From 2e86321219d33b7f9a7a0cf323cf55740a876ebe Mon Sep 17 00:00:00 2001 From: jironglin Date: Wed, 3 Mar 2021 01:28:36 +0000 Subject: [PATCH 001/123] cvt_roundps,pd_epi32,epu32; cvt_roundepi32,epu32_ps; cvt_roundpd_ps; mm_add,sub,mul,div_round_ss,sd; mm_sqrt_round_ss,sd; mm_scalf_round_ss,sd; mm_fmadd,fmsub,fnmadd,fnmsub_round_ss,sd; mm_cvt_roundss_i32,u32; mm_cvt_roundsd_i32,u32; mm_cvt_roundi32,u32_ss; mm_cvt_roundsd_ss --- crates/core_arch/src/x86/avx512f.rs | 2186 ++++++++++-------------- crates/core_arch/src/x86_64/avx512f.rs | 30 +- 2 files changed, 878 insertions(+), 1338 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 7bf8bdeae9..7911157eb2 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -13393,17 +13393,13 @@ pub unsafe fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundps_epi32&expand=1335) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2dq, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvt_roundps_epi32(a: __m512, rounding: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvt_roundps_epi32(a: __m512) -> __m512i { + static_assert_rounding!(ROUNDING); let a = a.as_f32x16(); let zero = _mm512_setzero_si512().as_i32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2dq(a, zero, 0b11111111_11111111, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtps2dq(a, zero, 0b11111111_11111111, ROUNDING); transmute(r) } @@ -13419,22 +13415,17 @@ pub unsafe fn _mm512_cvt_roundps_epi32(a: __m512, rounding: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundps_epi32&expand=1336) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2dq, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvt_roundps_epi32( +#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvt_roundps_epi32( src: __m512i, k: __mmask16, a: __m512, - rounding: i32, ) -> __m512i { + static_assert_rounding!(ROUNDING); let a = a.as_f32x16(); let src = src.as_i32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2dq(a, src, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtps2dq(a, src, k, ROUNDING); transmute(r) } @@ -13450,17 +13441,16 @@ pub unsafe fn _mm512_mask_cvt_roundps_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundps_epi32&expand=1337) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2dq, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvt_roundps_epi32(k: __mmask16, a: __m512, rounding: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvt_roundps_epi32( + k: __mmask16, + a: __m512, +) -> __m512i { + static_assert_rounding!(ROUNDING); let a = a.as_f32x16(); let zero = _mm512_setzero_si512().as_i32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2dq(a, zero, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtps2dq(a, zero, k, ROUNDING); transmute(r) } @@ -13476,17 +13466,13 @@ pub unsafe fn _mm512_maskz_cvt_roundps_epi32(k: __mmask16, a: __m512, rounding: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundps_epu32&expand=1341) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2udq, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvt_roundps_epu32(a: __m512, rounding: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvt_roundps_epu32(a: __m512) -> __m512i { + static_assert_rounding!(ROUNDING); let a = a.as_f32x16(); let zero = _mm512_setzero_si512().as_u32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2udq(a, zero, 0b11111111_11111111, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtps2udq(a, zero, 0b11111111_11111111, ROUNDING); transmute(r) } @@ -13502,22 +13488,17 @@ pub unsafe fn _mm512_cvt_roundps_epu32(a: __m512, rounding: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundps_epu32&expand=1342) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2udq, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvt_roundps_epu32( +#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvt_roundps_epu32( src: __m512i, k: __mmask16, a: __m512, - rounding: i32, ) -> __m512i { + static_assert_rounding!(ROUNDING); let a = a.as_f32x16(); let src = src.as_u32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2udq(a, src, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtps2udq(a, src, k, ROUNDING); transmute(r) } @@ -13533,17 +13514,16 @@ pub unsafe fn _mm512_mask_cvt_roundps_epu32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundps_epu32&expand=1343) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2udq, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvt_roundps_epu32(k: __mmask16, a: __m512, rounding: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvt_roundps_epu32( + k: __mmask16, + a: __m512, +) -> __m512i { + static_assert_rounding!(ROUNDING); let a = a.as_f32x16(); let zero = _mm512_setzero_si512().as_u32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2udq(a, zero, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtps2udq(a, zero, k, ROUNDING); transmute(r) } @@ -13624,17 +13604,13 @@ pub unsafe fn _mm512_maskz_cvt_roundps_pd(k: __mmask8, a: __m256, sae: i32) -> _ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundpd_epi32&expand=1315) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtpd2dq, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvt_roundpd_epi32(a: __m512d, rounding: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvt_roundpd_epi32(a: __m512d) -> __m256i { + static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let zero = _mm256_setzero_si256().as_i32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtpd2dq(a, zero, 0b11111111, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtpd2dq(a, zero, 0b11111111, ROUNDING); transmute(r) } @@ -13650,22 +13626,17 @@ pub unsafe fn _mm512_cvt_roundpd_epi32(a: __m512d, rounding: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundpd_epi32&expand=1316) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtpd2dq, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvt_roundpd_epi32( +#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvt_roundpd_epi32( src: __m256i, k: __mmask8, a: __m512d, - rounding: i32, ) -> __m256i { + static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let src = src.as_i32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtpd2dq(a, src, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtpd2dq(a, src, k, ROUNDING); transmute(r) } @@ -13681,17 +13652,16 @@ pub unsafe fn _mm512_mask_cvt_roundpd_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epi32&expand=1317) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtpd2dq, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvt_roundpd_epi32(k: __mmask8, a: __m512d, rounding: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvt_roundpd_epi32( + k: __mmask8, + a: __m512d, +) -> __m256i { + static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let zero = _mm256_setzero_si256().as_i32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtpd2dq(a, zero, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtpd2dq(a, zero, k, ROUNDING); transmute(r) } @@ -13707,17 +13677,13 @@ pub unsafe fn _mm512_maskz_cvt_roundpd_epi32(k: __mmask8, a: __m512d, rounding: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundpd_epu32&expand=1321) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtpd2udq, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvt_roundpd_epu32(a: __m512d, rounding: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvt_roundpd_epu32(a: __m512d) -> __m256i { + static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let zero = _mm256_setzero_si256().as_u32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtpd2udq(a, zero, 0b11111111, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtpd2udq(a, zero, 0b11111111, ROUNDING); transmute(r) } @@ -13733,22 +13699,17 @@ pub unsafe fn _mm512_cvt_roundpd_epu32(a: __m512d, rounding: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundpd_epu32&expand=1322) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtpd2udq, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvt_roundpd_epu32( +#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvt_roundpd_epu32( src: __m256i, k: __mmask8, a: __m512d, - rounding: i32, ) -> __m256i { + static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let src = src.as_u32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtpd2udq(a, src, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtpd2udq(a, src, k, ROUNDING); transmute(r) } @@ -13764,17 +13725,16 @@ pub unsafe fn _mm512_mask_cvt_roundpd_epu32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epu32&expand=1323) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtpd2udq, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvt_roundpd_epu32(k: __mmask8, a: __m512d, rounding: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvt_roundpd_epu32( + k: __mmask8, + a: __m512d, +) -> __m256i { + static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let zero = _mm256_setzero_si256().as_u32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtpd2udq(a, zero, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtpd2udq(a, zero, k, ROUNDING); transmute(r) } @@ -13790,17 +13750,13 @@ pub unsafe fn _mm512_maskz_cvt_roundpd_epu32(k: __mmask8, a: __m512d, rounding: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundpd_ps&expand=1327) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtpd2ps, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvt_roundpd_ps(a: __m512d, rounding: i32) -> __m256 { +#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvt_roundpd_ps(a: __m512d) -> __m256 { + static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let zero = _mm256_setzero_ps().as_f32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtpd2ps(a, zero, 0b11111111, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtpd2ps(a, zero, 0b11111111, ROUNDING); transmute(r) } @@ -13816,22 +13772,17 @@ pub unsafe fn _mm512_cvt_roundpd_ps(a: __m512d, rounding: i32) -> __m256 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundpd_ps&expand=1328) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtpd2ps, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvt_roundpd_ps( +#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvt_roundpd_ps( src: __m256, k: __mmask8, a: __m512d, - rounding: i32, ) -> __m256 { + static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let src = src.as_f32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtpd2ps(a, src, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtpd2ps(a, src, k, ROUNDING); transmute(r) } @@ -13847,17 +13798,13 @@ pub unsafe fn _mm512_mask_cvt_roundpd_ps( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_ps&expand=1329) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtpd2ps, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvt_roundpd_ps(k: __mmask8, a: __m512d, rounding: i32) -> __m256 { +#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvt_roundpd_ps(k: __mmask8, a: __m512d) -> __m256 { + static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let zero = _mm256_setzero_ps().as_f32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtpd2ps(a, zero, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtpd2ps(a, zero, k, ROUNDING); transmute(r) } @@ -13873,16 +13820,12 @@ pub unsafe fn _mm512_maskz_cvt_roundpd_ps(k: __mmask8, a: __m512d, rounding: i32 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi32_ps&expand=1294) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtdq2ps, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvt_roundepi32_ps(a: __m512i, rounding: i32) -> __m512 { +#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvt_roundepi32_ps(a: __m512i) -> __m512 { + static_assert_rounding!(ROUNDING); let a = a.as_i32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtdq2ps(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtdq2ps(a, ROUNDING); transmute(r) } @@ -13898,21 +13841,16 @@ pub unsafe fn _mm512_cvt_roundepi32_ps(a: __m512i, rounding: i32) -> __m512 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi32_ps&expand=1295) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtdq2ps, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvt_roundepi32_ps( +#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvt_roundepi32_ps( src: __m512, k: __mmask16, a: __m512i, - rounding: i32, ) -> __m512 { + static_assert_rounding!(ROUNDING); let a = a.as_i32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtdq2ps(a, $imm4) - }; - } - let r: f32x16 = constify_imm4_round!(rounding, call); + let r = vcvtdq2ps(a, ROUNDING); transmute(simd_select_bitmask(k, r, src.as_f32x16())) } @@ -13928,16 +13866,15 @@ pub unsafe fn _mm512_mask_cvt_roundepi32_ps( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi32_ps&expand=1296) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtdq2ps, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvt_roundepi32_ps(k: __mmask16, a: __m512i, rounding: i32) -> __m512 { +#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvt_roundepi32_ps( + k: __mmask16, + a: __m512i, +) -> __m512 { + static_assert_rounding!(ROUNDING); let a = a.as_i32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtdq2ps(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtdq2ps(a, ROUNDING); let zero = _mm512_setzero_ps().as_f32x16(); transmute(simd_select_bitmask(k, r, zero)) } @@ -13954,16 +13891,12 @@ pub unsafe fn _mm512_maskz_cvt_roundepi32_ps(k: __mmask16, a: __m512i, rounding: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu32_ps&expand=1303) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtudq2ps, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvt_roundepu32_ps(a: __m512i, rounding: i32) -> __m512 { +#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvt_roundepu32_ps(a: __m512i) -> __m512 { + static_assert_rounding!(ROUNDING); let a = a.as_u32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtudq2ps(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtudq2ps(a, ROUNDING); transmute(r) } @@ -13979,21 +13912,16 @@ pub unsafe fn _mm512_cvt_roundepu32_ps(a: __m512i, rounding: i32) -> __m512 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu32_ps&expand=1304) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtudq2ps, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvt_roundepu32_ps( +#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvt_roundepu32_ps( src: __m512, k: __mmask16, a: __m512i, - rounding: i32, ) -> __m512 { + static_assert_rounding!(ROUNDING); let a = a.as_u32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtudq2ps(a, $imm4) - }; - } - let r: f32x16 = constify_imm4_round!(rounding, call); + let r = vcvtudq2ps(a, ROUNDING); transmute(simd_select_bitmask(k, r, src.as_f32x16())) } @@ -14009,16 +13937,15 @@ pub unsafe fn _mm512_mask_cvt_roundepu32_ps( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu32_ps&expand=1305) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtudq2ps, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvt_roundepu32_ps(k: __mmask16, a: __m512i, rounding: i32) -> __m512 { +#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvt_roundepu32_ps( + k: __mmask16, + a: __m512i, +) -> __m512 { + static_assert_rounding!(ROUNDING); let a = a.as_u32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtudq2ps(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtudq2ps(a, ROUNDING); let zero = _mm512_setzero_ps().as_f32x16(); transmute(simd_select_bitmask(k, r, zero)) } @@ -33519,18 +33446,15 @@ pub unsafe fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_ss&expand=151) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vaddss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_add_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_add_round_ss(a: __m128, b: __m128) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vaddss(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vaddss(a, b, zero, 0b1, ROUNDING); + transmute(r) } /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -33545,24 +33469,20 @@ pub unsafe fn _mm_add_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_add_round_ss&expand=152) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vaddss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_add_round_ss( +#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_add_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let src = src.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vaddss(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vaddss(a, b, src, k, ROUNDING); + transmute(r) } /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -33577,18 +33497,19 @@ pub unsafe fn _mm_mask_add_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_round_ss&expand=153) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vaddss, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_add_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_add_round_ss( + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vaddss(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vaddss(a, b, zero, k, ROUNDING); + transmute(r) } /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -33603,18 +33524,15 @@ pub unsafe fn _mm_maskz_add_round_ss(k: __mmask8, a: __m128, b: __m128, rounding /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_sd&expand=148) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vaddsd, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_add_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_add_round_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vaddsd(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vaddsd(a, b, zero, 0b1, ROUNDING); + transmute(r) } /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -33629,24 +33547,20 @@ pub unsafe fn _mm_add_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_add_round_Sd&expand=149) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vaddsd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_add_round_sd( +#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_add_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let src = src.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vaddsd(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vaddsd(a, b, src, k, ROUNDING); + transmute(r) } /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -33661,23 +33575,19 @@ pub unsafe fn _mm_mask_add_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_round_sd&expand=150) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vaddsd, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_add_round_sd( +#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_add_round_sd( k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vaddsd(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vaddsd(a, b, zero, k, ROUNDING); + transmute(r) } /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -33692,18 +33602,15 @@ pub unsafe fn _mm_maskz_add_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_sub_round_ss&expand=5745) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsubss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_sub_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_sub_round_ss(a: __m128, b: __m128) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vsubss(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsubss(a, b, zero, 0b1, ROUNDING); + transmute(r) } /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -33718,24 +33625,20 @@ pub unsafe fn _mm_sub_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sub_round_ss&expand=5743) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsubss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_sub_round_ss( +#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_sub_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let src = src.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vsubss(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsubss(a, b, src, k, ROUNDING); + transmute(r) } /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -33750,18 +33653,19 @@ pub unsafe fn _mm_mask_sub_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sub_round_ss&expand=5744) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsubss, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_sub_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_sub_round_ss( + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vsubss(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsubss(a, b, zero, k, ROUNDING); + transmute(r) } /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -33776,18 +33680,15 @@ pub unsafe fn _mm_maskz_sub_round_ss(k: __mmask8, a: __m128, b: __m128, rounding /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_sub_round_sd&expand=5742) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsubsd, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_sub_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_sub_round_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vsubsd(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsubsd(a, b, zero, 0b1, ROUNDING); + transmute(r) } /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -33802,24 +33703,20 @@ pub unsafe fn _mm_sub_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sub_round_sd&expand=5740) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsubsd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_sub_round_sd( +#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_sub_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let src = src.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vsubsd(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsubsd(a, b, src, k, ROUNDING); + transmute(r) } /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -33834,23 +33731,19 @@ pub unsafe fn _mm_mask_sub_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sub_round_sd&expand=5741) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsubsd, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_sub_round_sd( +#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_sub_round_sd( k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vsubsd(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsubsd(a, b, zero, k, ROUNDING); + transmute(r) } /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -33865,18 +33758,15 @@ pub unsafe fn _mm_maskz_sub_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mul_round_ss&expand=3946) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmulss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_mul_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_mul_round_ss(a: __m128, b: __m128) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vmulss(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vmulss(a, b, zero, 0b1, ROUNDING); + transmute(r) } /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -33891,24 +33781,20 @@ pub unsafe fn _mm_mul_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_mul_round_ss&expand=3944) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmulss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_mul_round_ss( +#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_mul_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let src = src.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vmulss(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vmulss(a, b, src, k, ROUNDING); + transmute(r) } /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -33923,18 +33809,19 @@ pub unsafe fn _mm_mask_mul_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_mul_round_ss&expand=3945) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmulss, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_mul_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_mul_round_ss( + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vmulss(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vmulss(a, b, zero, k, ROUNDING); + transmute(r) } /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -33949,18 +33836,15 @@ pub unsafe fn _mm_maskz_mul_round_ss(k: __mmask8, a: __m128, b: __m128, rounding /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mul_round_sd&expand=3943) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmulsd, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_mul_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_mul_round_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vmulsd(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vmulsd(a, b, zero, 0b1, ROUNDING); + transmute(r) } /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -33975,24 +33859,20 @@ pub unsafe fn _mm_mul_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_mul_round_sd&expand=3941) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmulsd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_mul_round_sd( +#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_mul_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let src = src.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vmulsd(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vmulsd(a, b, src, k, ROUNDING); + transmute(r) } /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -34007,23 +33887,19 @@ pub unsafe fn _mm_mask_mul_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_mul_round_sd&expand=3942) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmulsd, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_mul_round_sd( +#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_mul_round_sd( k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vmulsd(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vmulsd(a, b, zero, k, ROUNDING); + transmute(r) } /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34038,18 +33914,15 @@ pub unsafe fn _mm_maskz_mul_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_div_round_ss&expand=2174) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vdivss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_div_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_div_round_ss(a: __m128, b: __m128) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vdivss(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vdivss(a, b, zero, 0b1, ROUNDING); + transmute(r) } /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34064,24 +33937,20 @@ pub unsafe fn _mm_div_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_div_round_ss&expand=2175) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vdivss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_div_round_ss( +#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_div_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let src = src.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vdivss(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vdivss(a, b, src, k, ROUNDING); + transmute(r) } /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34096,18 +33965,19 @@ pub unsafe fn _mm_mask_div_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_div_round_ss&expand=2176) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vdivss, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_div_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_div_round_ss( + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vdivss(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vdivss(a, b, zero, k, ROUNDING); + transmute(r) } /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -34122,18 +33992,15 @@ pub unsafe fn _mm_maskz_div_round_ss(k: __mmask8, a: __m128, b: __m128, rounding /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_div_round_sd&expand=2171) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vdivsd, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_div_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_div_round_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vdivsd(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vdivsd(a, b, zero, 0b1, ROUNDING); + transmute(r) } /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -34148,24 +34015,20 @@ pub unsafe fn _mm_div_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_div_round_sd&expand=2172) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vdivsd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_div_round_sd( +#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_div_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let src = src.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vdivsd(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vdivsd(a, b, src, k, ROUNDING); + transmute(r) } /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -34180,23 +34043,19 @@ pub unsafe fn _mm_mask_div_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_div_round_sd&expand=2173) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vdivsd, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_div_round_sd( +#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_div_round_sd( k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vdivsd(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vdivsd(a, b, zero, k, ROUNDING); + transmute(r) } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34475,18 +34334,15 @@ pub unsafe fn _mm_maskz_min_round_sd(k: __mmask8, a: __m128d, b: __m128d, sae: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_sqrt_round_ss&expand=5383) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsqrtss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_sqrt_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_sqrt_round_ss(a: __m128, b: __m128) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vsqrtss(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsqrtss(a, b, zero, 0b1, ROUNDING); + transmute(r) } /// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34501,24 +34357,20 @@ pub unsafe fn _mm_sqrt_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sqrt_round_ss&expand=5381) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsqrtss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_sqrt_round_ss( +#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_sqrt_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let src = src.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vsqrtss(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsqrtss(a, b, src, k, ROUNDING); + transmute(r) } /// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34533,18 +34385,19 @@ pub unsafe fn _mm_mask_sqrt_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sqrt_round_ss&expand=5382) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsqrtss, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_sqrt_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_sqrt_round_ss( + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vsqrtss(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsqrtss(a, b, zero, k, ROUNDING); + transmute(r) } /// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -34559,18 +34412,15 @@ pub unsafe fn _mm_maskz_sqrt_round_ss(k: __mmask8, a: __m128, b: __m128, roundin /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_sqrt_round_sd&expand=5380) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsqrtsd, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_sqrt_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_sqrt_round_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vsqrtsd(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsqrtsd(a, b, zero, 0b1, ROUNDING); + transmute(r) } /// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -34585,24 +34435,20 @@ pub unsafe fn _mm_sqrt_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sqrt_round_sd&expand=5378) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsqrtsd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_sqrt_round_sd( +#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_sqrt_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let src = src.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vsqrtsd(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsqrtsd(a, b, src, k, ROUNDING); + transmute(r) } /// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -34617,23 +34463,19 @@ pub unsafe fn _mm_mask_sqrt_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sqrt_round_sd&expand=5379) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsqrtsd, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_sqrt_round_sd( +#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_sqrt_round_sd( k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vsqrtsd(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsqrtsd(a, b, zero, k, ROUNDING); + transmute(r) } /// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\ @@ -35196,18 +35038,14 @@ pub unsafe fn _mm_maskz_roundscale_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_scalef_round_ss&expand=4895) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vscalefss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_scalef_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_scalef_round_ss(a: __m128, b: __m128) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vscalefss(a, b, zero, 0b11111111, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vscalefss(a, b, zero, 0b11111111, ROUNDING); transmute(r) } @@ -35223,24 +35061,19 @@ pub unsafe fn _mm_scalef_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_scalef_round_ss&expand=4893) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vscalefss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_scalef_round_ss( +#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_scalef_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let src = src.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vscalefss(a, b, src, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vscalefss(a, b, src, k, ROUNDING); transmute(r) } @@ -35256,23 +35089,18 @@ pub unsafe fn _mm_mask_scalef_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_scalef_round_ss&expand=4894) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vscalefss, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_scalef_round_ss( +#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_scalef_round_ss( k: __mmask8, a: __m128, b: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vscalefss(a, b, zero, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vscalefss(a, b, zero, k, ROUNDING); transmute(r) } @@ -35288,18 +35116,14 @@ pub unsafe fn _mm_maskz_scalef_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_scalef_round_sd&expand=4892) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vscalefsd, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_scalef_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_scalef_round_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vscalefsd(a, b, zero, 0b11111111, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vscalefsd(a, b, zero, 0b11111111, ROUNDING); transmute(r) } @@ -35315,24 +35139,18 @@ pub unsafe fn _mm_scalef_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m1 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_scalef_round_sd&expand=4890) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vscalefsd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_scalef_round_sd( +#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_scalef_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { let a = a.as_f64x2(); let b = b.as_f64x2(); let src = src.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vscalefsd(a, b, src, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vscalefsd(a, b, src, k, ROUNDING); transmute(r) } @@ -35348,23 +35166,18 @@ pub unsafe fn _mm_mask_scalef_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_scalef_round_sd&expand=4891) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vscalefsd, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_scalef_round_sd( +#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_scalef_round_sd( k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vscalefsd(a, b, zero, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vscalefsd(a, b, zero, k, ROUNDING); transmute(r) } @@ -35380,19 +35193,15 @@ pub unsafe fn _mm_maskz_scalef_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fmadd_round_ss&expand=2573) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_fmadd_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vfmadd213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_fmadd_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + static_assert_rounding!(ROUNDING); let extracta: f32 = simd_extract(a, 0); let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - let fmadd = constify_imm4_round!(rounding, call); - let r = simd_insert(a, 0, fmadd); + let r = vfmadd132ss(extracta, extractb, extractc, ROUNDING); + let r = simd_insert(a, 0, r); transmute(r) } @@ -35408,25 +35217,20 @@ pub unsafe fn _mm_fmadd_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmadd_round_ss&expand=2574) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_fmadd_round_ss( +#[cfg_attr(test, assert_instr(vfmadd213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_fmadd_round_ss( a: __m128, k: __mmask8, b: __m128, c: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fmadd: f32 = simd_extract(a, 0); if (k & 0b00000001) != 0 { let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(fmadd, extractb, extractc, $imm4) - }; - } - fmadd = constify_imm4_round!(rounding, call); + fmadd = vfmadd132ss(fmadd, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fmadd); transmute(r) @@ -35444,26 +35248,21 @@ pub unsafe fn _mm_mask_fmadd_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmadd_round_ss&expand=2576) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_maskz_fmadd_round_ss( +#[cfg_attr(test, assert_instr(vfmadd213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_maskz_fmadd_round_ss( k: __mmask8, a: __m128, b: __m128, c: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fmadd: f32 = 0.; if (k & 0b00000001) != 0 { let extracta: f32 = simd_extract(a, 0); let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - fmadd = constify_imm4_round!(rounding, call); + fmadd = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fmadd); transmute(r) @@ -35481,25 +35280,20 @@ pub unsafe fn _mm_maskz_fmadd_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmadd_round_ss&expand=2575) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask3_fmadd_round_ss( +#[cfg_attr(test, assert_instr(vfmadd213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask3_fmadd_round_ss( a: __m128, b: __m128, c: __m128, k: __mmask8, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fmadd: f32 = simd_extract(c, 0); if (k & 0b00000001) != 0 { let extracta: f32 = simd_extract(a, 0); let extractb: f32 = simd_extract(b, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, fmadd, $imm4) - }; - } - fmadd = constify_imm4_round!(rounding, call); + fmadd = vfmadd132ss(extracta, extractb, fmadd, ROUNDING); } let r = simd_insert(c, 0, fmadd); transmute(r) @@ -35517,18 +35311,18 @@ pub unsafe fn _mm_mask3_fmadd_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fmadd_round_sd&expand=2569) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_fmadd_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vfmadd213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_fmadd_round_sd( + a: __m128d, + b: __m128d, + c: __m128d, +) -> __m128d { + static_assert_rounding!(ROUNDING); let extracta: f64 = simd_extract(a, 0); let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - let fmadd = constify_imm4_round!(rounding, call); + let fmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING); let r = simd_insert(a, 0, fmadd); transmute(r) } @@ -35545,25 +35339,20 @@ pub unsafe fn _mm_fmadd_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmadd_round_sd&expand=2570) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_fmadd_round_sd( +#[cfg_attr(test, assert_instr(vfmadd213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_fmadd_round_sd( a: __m128d, k: __mmask8, b: __m128d, c: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fmadd: f64 = simd_extract(a, 0); if (k & 0b00000001) != 0 { let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(fmadd, extractb, extractc, $imm4) - }; - } - fmadd = constify_imm4_round!(rounding, call); + fmadd = vfmadd132sd(fmadd, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fmadd); transmute(r) @@ -35581,26 +35370,21 @@ pub unsafe fn _mm_mask_fmadd_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmadd_round_sd&expand=2572) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_maskz_fmadd_round_sd( +#[cfg_attr(test, assert_instr(vfmadd213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_maskz_fmadd_round_sd( k: __mmask8, a: __m128d, b: __m128d, c: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fmadd: f64 = 0.; if (k & 0b00000001) != 0 { let extracta: f64 = simd_extract(a, 0); let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - fmadd = constify_imm4_round!(rounding, call); + fmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fmadd); transmute(r) @@ -35618,25 +35402,20 @@ pub unsafe fn _mm_maskz_fmadd_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmadd_round_Sd&expand=2571) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask3_fmadd_round_sd( +#[cfg_attr(test, assert_instr(vfmadd213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask3_fmadd_round_sd( a: __m128d, b: __m128d, c: __m128d, k: __mmask8, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fmadd: f64 = simd_extract(c, 0); if (k & 0b00000001) != 0 { let extracta: f64 = simd_extract(a, 0); let extractb: f64 = simd_extract(b, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, fmadd, $imm4) - }; - } - fmadd = constify_imm4_round!(rounding, call); + fmadd = vfmadd132sd(extracta, extractb, fmadd, ROUNDING); } let r = simd_insert(c, 0, fmadd); transmute(r) @@ -35654,19 +35433,15 @@ pub unsafe fn _mm_mask3_fmadd_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fmsub_round_ss&expand=2659) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_fmsub_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vfmsub213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_fmsub_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + static_assert_rounding!(ROUNDING); let extracta: f32 = simd_extract(a, 0); let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - let fmsub = constify_imm4_round!(rounding, call); + let fmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING); let r = simd_insert(a, 0, fmsub); transmute(r) } @@ -35683,26 +35458,21 @@ pub unsafe fn _mm_fmsub_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmsub_round_ss&expand=2660) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_fmsub_round_ss( +#[cfg_attr(test, assert_instr(vfmsub213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_fmsub_round_ss( a: __m128, k: __mmask8, b: __m128, c: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fmsub: f32 = simd_extract(a, 0); if (k & 0b00000001) != 0 { let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(fmsub, extractb, extractc, $imm4) - }; - } - fmsub = constify_imm4_round!(rounding, call); + fmsub = vfmadd132ss(fmsub, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fmsub); transmute(r) @@ -35720,27 +35490,22 @@ pub unsafe fn _mm_mask_fmsub_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmsub_round_ss&expand=2662) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_maskz_fmsub_round_ss( +#[cfg_attr(test, assert_instr(vfmsub213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_maskz_fmsub_round_ss( k: __mmask8, a: __m128, b: __m128, c: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fmsub: f32 = 0.; if (k & 0b00000001) != 0 { let extracta: f32 = simd_extract(a, 0); let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - fmsub = constify_imm4_round!(rounding, call); + fmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fmsub); transmute(r) @@ -35758,26 +35523,21 @@ pub unsafe fn _mm_maskz_fmsub_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmsub_round_ss&expand=2661) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask3_fmsub_round_ss( +#[cfg_attr(test, assert_instr(vfmsub213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask3_fmsub_round_ss( a: __m128, b: __m128, c: __m128, k: __mmask8, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fmsub: f32 = simd_extract(c, 0); if (k & 0b00000001) != 0 { let extracta: f32 = simd_extract(a, 0); let extractb: f32 = simd_extract(b, 0); let extractc = -fmsub; - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - fmsub = constify_imm4_round!(rounding, call); + fmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(c, 0, fmsub); transmute(r) @@ -35795,19 +35555,19 @@ pub unsafe fn _mm_mask3_fmsub_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fmsub_round_sd&expand=2655) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_fmsub_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vfmsub213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_fmsub_round_sd( + a: __m128d, + b: __m128d, + c: __m128d, +) -> __m128d { + static_assert_rounding!(ROUNDING); let extracta: f64 = simd_extract(a, 0); let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - let fmsub = constify_imm4_round!(rounding, call); + let fmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING); let r = simd_insert(a, 0, fmsub); transmute(r) } @@ -35824,26 +35584,21 @@ pub unsafe fn _mm_fmsub_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmsub_round_sd&expand=2656) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_fmsub_round_sd( +#[cfg_attr(test, assert_instr(vfmsub213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_fmsub_round_sd( a: __m128d, k: __mmask8, b: __m128d, c: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fmsub: f64 = simd_extract(a, 0); if (k & 0b00000001) != 0 { let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(fmsub, extractb, extractc, $imm4) - }; - } - fmsub = constify_imm4_round!(rounding, call); + fmsub = vfmadd132sd(fmsub, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fmsub); transmute(r) @@ -35861,27 +35616,22 @@ pub unsafe fn _mm_mask_fmsub_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmsub_round_sd&expand=2658) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_maskz_fmsub_round_sd( +#[cfg_attr(test, assert_instr(vfmsub213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_maskz_fmsub_round_sd( k: __mmask8, a: __m128d, b: __m128d, c: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fmsub: f64 = 0.; if (k & 0b00000001) != 0 { let extracta: f64 = simd_extract(a, 0); let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - fmsub = constify_imm4_round!(rounding, call); + fmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fmsub); transmute(r) @@ -35899,26 +35649,21 @@ pub unsafe fn _mm_maskz_fmsub_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmsub_round_sd&expand=2657) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask3_fmsub_round_sd( +#[cfg_attr(test, assert_instr(vfmsub213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask3_fmsub_round_sd( a: __m128d, b: __m128d, c: __m128d, k: __mmask8, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fmsub: f64 = simd_extract(c, 0); if (k & 0b00000001) != 0 { let extracta: f64 = simd_extract(a, 0); let extractb: f64 = simd_extract(b, 0); let extractc = -fmsub; - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - fmsub = constify_imm4_round!(rounding, call); + fmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(c, 0, fmsub); transmute(r) @@ -35936,19 +35681,15 @@ pub unsafe fn _mm_mask3_fmsub_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fnmadd_round_ss&expand=2739) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_fnmadd_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vfnmadd213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_fnmadd_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + static_assert_rounding!(ROUNDING); let extracta: f32 = simd_extract(a, 0); let extracta = -extracta; let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - let fnmadd = constify_imm4_round!(rounding, call); + let fnmadd = vfmadd132ss(extracta, extractb, extractc, ROUNDING); let r = simd_insert(a, 0, fnmadd); transmute(r) } @@ -35965,26 +35706,21 @@ pub unsafe fn _mm_fnmadd_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmadd_round_ss&expand=2740) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_fnmadd_round_ss( +#[cfg_attr(test, assert_instr(vfnmadd213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_fnmadd_round_ss( a: __m128, k: __mmask8, b: __m128, c: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fnmadd: f32 = simd_extract(a, 0); if (k & 0b00000001) != 0 { let extracta = -fnmadd; let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - fnmadd = constify_imm4_round!(rounding, call); + fnmadd = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fnmadd); transmute(r) @@ -36002,27 +35738,22 @@ pub unsafe fn _mm_mask_fnmadd_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmadd_round_ss&expand=2742) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_maskz_fnmadd_round_ss( +#[cfg_attr(test, assert_instr(vfnmadd213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_maskz_fnmadd_round_ss( k: __mmask8, a: __m128, b: __m128, c: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fnmadd: f32 = 0.; if (k & 0b00000001) != 0 { let extracta: f32 = simd_extract(a, 0); let extracta = -extracta; let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - fnmadd = constify_imm4_round!(rounding, call); + fnmadd = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fnmadd); transmute(r) @@ -36040,26 +35771,21 @@ pub unsafe fn _mm_maskz_fnmadd_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmadd_round_ss&expand=2741) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask3_fnmadd_round_ss( +#[cfg_attr(test, assert_instr(vfnmadd213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask3_fnmadd_round_ss( a: __m128, b: __m128, c: __m128, k: __mmask8, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fnmadd: f32 = simd_extract(c, 0); if (k & 0b00000001) != 0 { let extracta: f32 = simd_extract(a, 0); let extracta = -extracta; let extractb: f32 = simd_extract(b, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, fnmadd, $imm4) - }; - } - fnmadd = constify_imm4_round!(rounding, call); + fnmadd = vfmadd132ss(extracta, extractb, fnmadd, ROUNDING); } let r = simd_insert(c, 0, fnmadd); transmute(r) @@ -36077,19 +35803,19 @@ pub unsafe fn _mm_mask3_fnmadd_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fnmadd_round_sd&expand=2735) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_fnmadd_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vfnmadd213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_fnmadd_round_sd( + a: __m128d, + b: __m128d, + c: __m128d, +) -> __m128d { + static_assert_rounding!(ROUNDING); let extracta: f64 = simd_extract(a, 0); let extracta = -extracta; let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - let fnmadd = constify_imm4_round!(rounding, call); + let fnmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING); let r = simd_insert(a, 0, fnmadd); transmute(r) } @@ -36106,26 +35832,21 @@ pub unsafe fn _mm_fnmadd_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmadd_round_sd&expand=2736) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_fnmadd_round_sd( +#[cfg_attr(test, assert_instr(vfnmadd213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_fnmadd_round_sd( a: __m128d, k: __mmask8, b: __m128d, c: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fnmadd: f64 = simd_extract(a, 0); if (k & 0b00000001) != 0 { let extracta = -fnmadd; let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - fnmadd = constify_imm4_round!(rounding, call); + fnmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fnmadd); transmute(r) @@ -36143,27 +35864,22 @@ pub unsafe fn _mm_mask_fnmadd_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmadd_round_sd&expand=2738) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_maskz_fnmadd_round_sd( +#[cfg_attr(test, assert_instr(vfnmadd213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_maskz_fnmadd_round_sd( k: __mmask8, a: __m128d, b: __m128d, c: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fnmadd: f64 = 0.; if (k & 0b00000001) != 0 { let extracta: f64 = simd_extract(a, 0); let extracta = -extracta; let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - fnmadd = constify_imm4_round!(rounding, call); + fnmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fnmadd); transmute(r) @@ -36181,26 +35897,21 @@ pub unsafe fn _mm_maskz_fnmadd_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmadd_round_Sd&expand=2737) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask3_fnmadd_round_sd( +#[cfg_attr(test, assert_instr(vfnmadd213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask3_fnmadd_round_sd( a: __m128d, b: __m128d, c: __m128d, k: __mmask8, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fnmadd: f64 = simd_extract(c, 0); if (k & 0b00000001) != 0 { let extracta: f64 = simd_extract(a, 0); let extracta = -extracta; let extractb: f64 = simd_extract(b, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, fnmadd, $imm4) - }; - } - fnmadd = constify_imm4_round!(rounding, call); + fnmadd = vfmadd132sd(extracta, extractb, fnmadd, ROUNDING); } let r = simd_insert(c, 0, fnmadd); transmute(r) @@ -36218,20 +35929,16 @@ pub unsafe fn _mm_mask3_fnmadd_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fnmsub_round_ss&expand=2787) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_fnmsub_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vfnmsub213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_fnmsub_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + static_assert_rounding!(ROUNDING); let extracta: f32 = simd_extract(a, 0); let extracta = -extracta; let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - let fnmsub = constify_imm4_round!(rounding, call); + let fnmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING); let r = simd_insert(a, 0, fnmsub); transmute(r) } @@ -36248,27 +35955,22 @@ pub unsafe fn _mm_fnmsub_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmsub_round_ss&expand=2788) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_fnmsub_round_ss( +#[cfg_attr(test, assert_instr(vfnmsub213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_fnmsub_round_ss( a: __m128, k: __mmask8, b: __m128, c: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fnmsub: f32 = simd_extract(a, 0); if (k & 0b00000001) != 0 { let extracta = -fnmsub; let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - fnmsub = constify_imm4_round!(rounding, call); + fnmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fnmsub); transmute(r) @@ -36286,15 +35988,15 @@ pub unsafe fn _mm_mask_fnmsub_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmsub_round_ss&expand=2790) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_maskz_fnmsub_round_ss( +#[cfg_attr(test, assert_instr(vfnmsub213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_maskz_fnmsub_round_ss( k: __mmask8, a: __m128, b: __m128, c: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fnmsub: f32 = 0.; if (k & 0b00000001) != 0 { let extracta: f32 = simd_extract(a, 0); @@ -36302,12 +36004,7 @@ pub unsafe fn _mm_maskz_fnmsub_round_ss( let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - fnmsub = constify_imm4_round!(rounding, call); + fnmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fnmsub); transmute(r) @@ -36325,27 +36022,22 @@ pub unsafe fn _mm_maskz_fnmsub_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmsub_round_ss&expand=2789) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask3_fnmsub_round_ss( +#[cfg_attr(test, assert_instr(vfnmsub213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask3_fnmsub_round_ss( a: __m128, b: __m128, c: __m128, k: __mmask8, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fnmsub: f32 = simd_extract(c, 0); if (k & 0b00000001) != 0 { let extracta: f32 = simd_extract(a, 0); let extracta = -extracta; let extractb: f32 = simd_extract(b, 0); let extractc = -fnmsub; - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - fnmsub = constify_imm4_round!(rounding, call); + fnmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(c, 0, fnmsub); transmute(r) @@ -36363,20 +36055,20 @@ pub unsafe fn _mm_mask3_fnmsub_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fnmsub_round_sd&expand=2783) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_fnmsub_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vfnmsub213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_fnmsub_round_sd( + a: __m128d, + b: __m128d, + c: __m128d, +) -> __m128d { + static_assert_rounding!(ROUNDING); let extracta: f64 = simd_extract(a, 0); let extracta = -extracta; let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - let fnmsub = constify_imm4_round!(rounding, call); + let fnmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING); let r = simd_insert(a, 0, fnmsub); transmute(r) } @@ -36393,27 +36085,22 @@ pub unsafe fn _mm_fnmsub_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmsub_round_sd&expand=2784) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_fnmsub_round_sd( +#[cfg_attr(test, assert_instr(vfnmsub213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_fnmsub_round_sd( a: __m128d, k: __mmask8, b: __m128d, c: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fnmsub: f64 = simd_extract(a, 0); if (k & 0b00000001) != 0 { let extracta = -fnmsub; let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - fnmsub = constify_imm4_round!(rounding, call); + fnmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fnmsub); transmute(r) @@ -36431,15 +36118,15 @@ pub unsafe fn _mm_mask_fnmsub_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmsub_round_sd&expand=2786) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_maskz_fnmsub_round_sd( +#[cfg_attr(test, assert_instr(vfnmsub213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_maskz_fnmsub_round_sd( k: __mmask8, a: __m128d, b: __m128d, c: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fnmsub: f64 = 0.; if (k & 0b00000001) != 0 { let extracta: f64 = simd_extract(a, 0); @@ -36447,12 +36134,7 @@ pub unsafe fn _mm_maskz_fnmsub_round_sd( let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - fnmsub = constify_imm4_round!(rounding, call); + fnmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fnmsub); transmute(r) @@ -36470,27 +36152,22 @@ pub unsafe fn _mm_maskz_fnmsub_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmsub_round_sd&expand=2785) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask3_fnmsub_round_sd( +#[cfg_attr(test, assert_instr(vfnmsub213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask3_fnmsub_round_sd( a: __m128d, b: __m128d, c: __m128d, k: __mmask8, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fnmsub: f64 = simd_extract(c, 0); if (k & 0b00000001) != 0 { let extracta: f64 = simd_extract(a, 0); let extracta = -extracta; let extractb: f64 = simd_extract(b, 0); let extractc = -fnmsub; - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - fnmsub = constify_imm4_round!(rounding, call); + fnmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(c, 0, fnmsub); transmute(r) @@ -36977,18 +36654,14 @@ pub unsafe fn _mm_maskz_cvt_roundss_sd(k: __mmask8, a: __m128d, b: __m128, sae: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundsd_ss&expand=1361) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2ss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_cvt_roundsd_ss(a: __m128, b: __m128d, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundsd_ss(a: __m128, b: __m128d) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f64x2(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2ss(a, b, zero, 0b11111111, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsd2ss(a, b, zero, 0b11111111, ROUNDING); transmute(r) } @@ -37003,24 +36676,19 @@ pub unsafe fn _mm_cvt_roundsd_ss(a: __m128, b: __m128d, rounding: i32) -> __m128 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_cvt_roundsd_ss&expand=1362) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_cvt_roundsd_ss( +#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_cvt_roundsd_ss( src: __m128, k: __mmask8, a: __m128, b: __m128d, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f64x2(); let src = src.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2ss(a, b, src, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsd2ss(a, b, src, k, ROUNDING); transmute(r) } @@ -37035,23 +36703,18 @@ pub unsafe fn _mm_mask_cvt_roundsd_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_cvt_roundsd_ss&expand=1363) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2ss, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_cvt_roundsd_ss( +#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_cvt_roundsd_ss( k: __mmask8, a: __m128, b: __m128d, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f64x2(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2ss(a, b, zero, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsd2ss(a, b, zero, k, ROUNDING); transmute(r) } @@ -37066,16 +36729,12 @@ pub unsafe fn _mm_maskz_cvt_roundsd_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundss_si32&expand=1374) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2si, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundss_si32(a: __m128, rounding: i32) -> i32 { +#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundss_si32(a: __m128) -> i32 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2si(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtss2si(a, ROUNDING); transmute(r) } @@ -37090,16 +36749,12 @@ pub unsafe fn _mm_cvt_roundss_si32(a: __m128, rounding: i32) -> i32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundss_i32&expand=1369) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2si, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundss_i32(a: __m128, rounding: i32) -> i32 { +#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundss_i32(a: __m128) -> i32 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2si(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtss2si(a, ROUNDING); transmute(r) } @@ -37114,16 +36769,12 @@ pub unsafe fn _mm_cvt_roundss_i32(a: __m128, rounding: i32) -> i32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundss_u32&expand=1376) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2usi, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundss_u32(a: __m128, rounding: i32) -> u32 { +#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundss_u32(a: __m128) -> u32 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2usi(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtss2usi(a, ROUNDING); transmute(r) } @@ -37158,16 +36809,12 @@ pub unsafe fn _mm_cvtss_u32(a: __m128) -> u32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundsd_si32&expand=1359) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2si, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundsd_si32(a: __m128d, rounding: i32) -> i32 { +#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundsd_si32(a: __m128d) -> i32 { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2si(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsd2si(a, ROUNDING); transmute(r) } @@ -37182,16 +36829,12 @@ pub unsafe fn _mm_cvt_roundsd_si32(a: __m128d, rounding: i32) -> i32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundsd_i32&expand=1357) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2si, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundsd_i32(a: __m128d, rounding: i32) -> i32 { +#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundsd_i32(a: __m128d) -> i32 { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2si(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsd2si(a, ROUNDING); transmute(r) } @@ -37206,16 +36849,12 @@ pub unsafe fn _mm_cvt_roundsd_i32(a: __m128d, rounding: i32) -> i32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=cvt_roundsd_u32&expand=1364) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2usi, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundsd_u32(a: __m128d, rounding: i32) -> u32 { +#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundsd_u32(a: __m128d) -> u32 { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2usi(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsd2usi(a, ROUNDING); transmute(r) } @@ -37251,16 +36890,12 @@ pub unsafe fn _mm_cvtsd_u32(a: __m128d) -> u32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundi32_ss&expand=1312) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsi2ss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_cvt_roundi32_ss(a: __m128, b: i32, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundi32_ss(a: __m128, b: i32) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtsi2ss(a, b, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsi2ss(a, b, ROUNDING); transmute(r) } @@ -37276,16 +36911,12 @@ pub unsafe fn _mm_cvt_roundi32_ss(a: __m128, b: i32, rounding: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundsi32_ss&expand=1366) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsi2ss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_cvt_roundsi32_ss(a: __m128, b: i32, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundsi32_ss(a: __m128, b: i32) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtsi2ss(a, b, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsi2ss(a, b, ROUNDING); transmute(r) } @@ -37300,16 +36931,12 @@ pub unsafe fn _mm_cvt_roundsi32_ss(a: __m128, b: i32, rounding: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundu32_ss&expand=1378) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtusi2ss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_cvt_roundu32_ss(a: __m128, b: u32, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundu32_ss(a: __m128, b: u32) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtusi2ss(a, b, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtusi2ss(a, b, ROUNDING); transmute(r) } @@ -44343,10 +43970,10 @@ mod tests { let a = _mm512_setr_ps( 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); - let r = _mm512_cvt_roundps_epi32(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); assert_eq_m512i(r, e); - let r = _mm512_cvt_roundps_epi32(a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a); let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 9, 10, 11, 12, 13, 14, 15); assert_eq_m512i(r, e); } @@ -44357,14 +43984,14 @@ mod tests { 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); let src = _mm512_set1_epi32(0); - let r = - _mm512_mask_cvt_roundps_epi32(src, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, + ); assert_eq_m512i(r, src); - let r = _mm512_mask_cvt_roundps_epi32( + let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( src, 0b00000000_11111111, a, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); @@ -44375,12 +44002,13 @@ mod tests { let a = _mm512_setr_ps( 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); - let r = _mm512_maskz_cvt_roundps_epi32(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, + ); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_cvt_roundps_epi32( + let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( 0b00000000_11111111, a, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); @@ -44391,10 +44019,10 @@ mod tests { let a = _mm512_setr_ps( 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); - let r = _mm512_cvt_roundps_epu32(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16); assert_eq_m512i(r, e); - let r = _mm512_cvt_roundps_epu32(a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a); let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15); assert_eq_m512i(r, e); } @@ -44405,14 +44033,14 @@ mod tests { 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); let src = _mm512_set1_epi32(0); - let r = - _mm512_mask_cvt_roundps_epu32(src, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, + ); assert_eq_m512i(r, src); - let r = _mm512_mask_cvt_roundps_epu32( + let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( src, 0b00000000_11111111, a, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); @@ -44423,12 +44051,13 @@ mod tests { let a = _mm512_setr_ps( 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); - let r = _mm512_maskz_cvt_roundps_epu32(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, + ); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_cvt_roundps_epu32( + let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( 0b00000000_11111111, a, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); @@ -44437,7 +44066,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvt_roundepi32_ps() { let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); - let r = _mm512_cvt_roundepi32_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); let e = _mm512_setr_ps( 0., -2., 2., -4., 4., -6., 6., -8., 8., 10., 10., 12., 12., 14., 14., 16., ); @@ -44448,14 +44077,14 @@ mod tests { unsafe fn test_mm512_mask_cvt_roundepi32_ps() { let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); let src = _mm512_set1_ps(0.); - let r = - _mm512_mask_cvt_roundepi32_ps(src, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, + ); assert_eq_m512(r, src); - let r = _mm512_mask_cvt_roundepi32_ps( + let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( src, 0b00000000_11111111, a, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); let e = _mm512_setr_ps( 0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0., @@ -44466,12 +44095,13 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvt_roundepi32_ps() { let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); - let r = _mm512_maskz_cvt_roundepi32_ps(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, + ); assert_eq_m512(r, _mm512_setzero_ps()); - let r = _mm512_maskz_cvt_roundepi32_ps( + let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( 0b00000000_11111111, a, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); let e = _mm512_setr_ps( 0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0., @@ -44482,7 +44112,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvt_roundepu32_ps() { let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); - let r = _mm512_cvt_roundepu32_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); #[rustfmt::skip] let e = _mm512_setr_ps( 0., 4294967300., 2., 4294967300., @@ -44497,14 +44127,14 @@ mod tests { unsafe fn test_mm512_mask_cvt_roundepu32_ps() { let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); let src = _mm512_set1_ps(0.); - let r = - _mm512_mask_cvt_roundepu32_ps(src, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, + ); assert_eq_m512(r, src); - let r = _mm512_mask_cvt_roundepu32_ps( + let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( src, 0b00000000_11111111, a, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); #[rustfmt::skip] let e = _mm512_setr_ps( @@ -44519,12 +44149,13 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvt_roundepu32_ps() { let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); - let r = _mm512_maskz_cvt_roundepu32_ps(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, + ); assert_eq_m512(r, _mm512_setzero_ps()); - let r = _mm512_maskz_cvt_roundepu32_ps( + let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( 0b00000000_11111111, a, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); #[rustfmt::skip] let e = _mm512_setr_ps( @@ -52671,7 +52302,7 @@ mod tests { unsafe fn test_mm_add_round_ss() { let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_add_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(1., 2., 10., 60.); assert_eq_m128(r, e); } @@ -52681,15 +52312,11 @@ mod tests { let src = _mm_set_ps(10., 11., 100., 110.); let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_mask_add_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); let e = _mm_set_ps(1., 2., 10., 110.); assert_eq_m128(r, e); - let r = _mm_mask_add_round_ss( - src, - 0b11111111, - a, - b, - _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC, + let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, ); let e = _mm_set_ps(1., 2., 10., 60.); assert_eq_m128(r, e); @@ -52699,10 +52326,11 @@ mod tests { unsafe fn test_mm_maskz_add_round_ss() { let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_maskz_add_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_ps(1., 2., 10., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_add_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); let e = _mm_set_ps(1., 2., 10., 60.); assert_eq_m128(r, e); } @@ -52711,7 +52339,7 @@ mod tests { unsafe fn test_mm_add_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_add_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_pd(1., 6.); assert_eq_m128d(r, e); } @@ -52721,15 +52349,11 @@ mod tests { let src = _mm_set_pd(10., 11.); let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_mask_add_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); let e = _mm_set_pd(1., 11.); assert_eq_m128d(r, e); - let r = _mm_mask_add_round_sd( - src, - 0b11111111, - a, - b, - _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC, + let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, ); let e = _mm_set_pd(1., 6.); assert_eq_m128d(r, e); @@ -52739,10 +52363,11 @@ mod tests { unsafe fn test_mm_maskz_add_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_maskz_add_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_pd(1., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_add_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); let e = _mm_set_pd(1., 6.); assert_eq_m128d(r, e); } @@ -52751,7 +52376,7 @@ mod tests { unsafe fn test_mm_sub_round_ss() { let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_sub_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(1., 2., 10., -20.); assert_eq_m128(r, e); } @@ -52761,15 +52386,11 @@ mod tests { let src = _mm_set_ps(10., 11., 100., 110.); let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_mask_sub_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); let e = _mm_set_ps(1., 2., 10., 110.); assert_eq_m128(r, e); - let r = _mm_mask_sub_round_ss( - src, - 0b11111111, - a, - b, - _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC, + let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, ); let e = _mm_set_ps(1., 2., 10., -20.); assert_eq_m128(r, e); @@ -52779,10 +52400,11 @@ mod tests { unsafe fn test_mm_maskz_sub_round_ss() { let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_maskz_sub_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_ps(1., 2., 10., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_sub_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); let e = _mm_set_ps(1., 2., 10., -20.); assert_eq_m128(r, e); } @@ -52791,7 +52413,7 @@ mod tests { unsafe fn test_mm_sub_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_sub_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_pd(1., -2.); assert_eq_m128d(r, e); } @@ -52801,15 +52423,11 @@ mod tests { let src = _mm_set_pd(10., 11.); let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_mask_sub_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); let e = _mm_set_pd(1., 11.); assert_eq_m128d(r, e); - let r = _mm_mask_sub_round_sd( - src, - 0b11111111, - a, - b, - _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC, + let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, ); let e = _mm_set_pd(1., -2.); assert_eq_m128d(r, e); @@ -52819,10 +52437,11 @@ mod tests { unsafe fn test_mm_maskz_sub_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_maskz_sub_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_pd(1., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_sub_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); let e = _mm_set_pd(1., -2.); assert_eq_m128d(r, e); } @@ -52831,7 +52450,7 @@ mod tests { unsafe fn test_mm_mul_round_ss() { let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_mul_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(1., 2., 10., 800.); assert_eq_m128(r, e); } @@ -52841,15 +52460,11 @@ mod tests { let src = _mm_set_ps(10., 11., 100., 110.); let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_mask_mul_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); let e = _mm_set_ps(1., 2., 10., 110.); assert_eq_m128(r, e); - let r = _mm_mask_mul_round_ss( - src, - 0b11111111, - a, - b, - _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC, + let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, ); let e = _mm_set_ps(1., 2., 10., 800.); assert_eq_m128(r, e); @@ -52859,10 +52474,11 @@ mod tests { unsafe fn test_mm_maskz_mul_round_ss() { let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_maskz_mul_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_ps(1., 2., 10., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_mul_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); let e = _mm_set_ps(1., 2., 10., 800.); assert_eq_m128(r, e); } @@ -52871,7 +52487,7 @@ mod tests { unsafe fn test_mm_mul_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_mul_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_pd(1., 8.); assert_eq_m128d(r, e); } @@ -52881,15 +52497,11 @@ mod tests { let src = _mm_set_pd(10., 11.); let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_mask_mul_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); let e = _mm_set_pd(1., 11.); assert_eq_m128d(r, e); - let r = _mm_mask_mul_round_sd( - src, - 0b11111111, - a, - b, - _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC, + let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, ); let e = _mm_set_pd(1., 8.); assert_eq_m128d(r, e); @@ -52899,10 +52511,11 @@ mod tests { unsafe fn test_mm_maskz_mul_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_maskz_mul_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_pd(1., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_mul_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); let e = _mm_set_pd(1., 8.); assert_eq_m128d(r, e); } @@ -52911,7 +52524,7 @@ mod tests { unsafe fn test_mm_div_round_ss() { let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_div_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(1., 2., 10., 0.5); assert_eq_m128(r, e); } @@ -52921,15 +52534,11 @@ mod tests { let src = _mm_set_ps(10., 11., 100., 110.); let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_mask_div_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); let e = _mm_set_ps(1., 2., 10., 110.); assert_eq_m128(r, e); - let r = _mm_mask_div_round_ss( - src, - 0b11111111, - a, - b, - _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC, + let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, ); let e = _mm_set_ps(1., 2., 10., 0.5); assert_eq_m128(r, e); @@ -52939,10 +52548,11 @@ mod tests { unsafe fn test_mm_maskz_div_round_ss() { let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_maskz_div_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_ps(1., 2., 10., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_div_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); let e = _mm_set_ps(1., 2., 10., 0.5); assert_eq_m128(r, e); } @@ -52951,7 +52561,7 @@ mod tests { unsafe fn test_mm_div_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_div_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_pd(1., 0.5); assert_eq_m128d(r, e); } @@ -52961,15 +52571,11 @@ mod tests { let src = _mm_set_pd(10., 11.); let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_mask_div_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); let e = _mm_set_pd(1., 11.); assert_eq_m128d(r, e); - let r = _mm_mask_div_round_sd( - src, - 0b11111111, - a, - b, - _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC, + let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, ); let e = _mm_set_pd(1., 0.5); assert_eq_m128d(r, e); @@ -52979,10 +52585,11 @@ mod tests { unsafe fn test_mm_maskz_div_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_maskz_div_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_pd(1., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_div_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); let e = _mm_set_pd(1., 0.5); assert_eq_m128d(r, e); } @@ -53123,7 +52730,7 @@ mod tests { unsafe fn test_mm_sqrt_round_ss() { let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 4.); - let r = _mm_sqrt_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(1., 2., 10., 2.); assert_eq_m128(r, e); } @@ -53133,15 +52740,11 @@ mod tests { let src = _mm_set_ps(10., 11., 100., 110.); let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 4.); - let r = _mm_mask_sqrt_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); let e = _mm_set_ps(1., 2., 10., 110.); assert_eq_m128(r, e); - let r = _mm_mask_sqrt_round_ss( - src, - 0b11111111, - a, - b, - _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC, + let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, ); let e = _mm_set_ps(1., 2., 10., 2.); assert_eq_m128(r, e); @@ -53151,10 +52754,11 @@ mod tests { unsafe fn test_mm_maskz_sqrt_round_ss() { let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 4.); - let r = _mm_maskz_sqrt_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_ps(1., 2., 10., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_sqrt_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); let e = _mm_set_ps(1., 2., 10., 2.); assert_eq_m128(r, e); } @@ -53163,7 +52767,7 @@ mod tests { unsafe fn test_mm_sqrt_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_sqrt_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_pd(1., 2.); assert_eq_m128d(r, e); } @@ -53173,15 +52777,11 @@ mod tests { let src = _mm_set_pd(10., 11.); let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_mask_sqrt_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); let e = _mm_set_pd(1., 11.); assert_eq_m128d(r, e); - let r = _mm_mask_sqrt_round_sd( - src, - 0b11111111, - a, - b, - _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC, + let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, ); let e = _mm_set_pd(1., 2.); assert_eq_m128d(r, e); @@ -53191,10 +52791,11 @@ mod tests { unsafe fn test_mm_maskz_sqrt_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_maskz_sqrt_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_pd(1., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_sqrt_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); let e = _mm_set_pd(1., 2.); assert_eq_m128d(r, e); } @@ -53473,7 +53074,7 @@ mod tests { unsafe fn test_mm_scalef_round_ss() { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(3.); - let r = _mm_scalef_round_ss(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(1., 1., 1., 8.); assert_eq_m128(r, e); } @@ -53482,15 +53083,13 @@ mod tests { unsafe fn test_mm_mask_scalef_round_ss() { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(3.); - let r = _mm_mask_scalef_round_ss(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, a, b, + ); let e = _mm_set_ps(1., 1., 1., 1.); assert_eq_m128(r, e); - let r = _mm_mask_scalef_round_ss( - a, - 0b11111111, - a, - b, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, a, b, ); let e = _mm_set_ps(1., 1., 1., 8.); assert_eq_m128(r, e); @@ -53500,14 +53099,12 @@ mod tests { unsafe fn test_mm_maskz_scalef_round_ss() { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(3.); - let r = _mm_maskz_scalef_round_ss(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_ps(1., 1., 1., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_scalef_round_ss( - 0b11111111, - a, - b, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, ); let e = _mm_set_ps(1., 1., 1., 8.); assert_eq_m128(r, e); @@ -53517,7 +53114,7 @@ mod tests { unsafe fn test_mm_scalef_round_sd() { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(3.); - let r = _mm_scalef_round_sd(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_pd(1., 8.); assert_eq_m128d(r, e); } @@ -53526,15 +53123,13 @@ mod tests { unsafe fn test_mm_mask_scalef_round_sd() { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(3.); - let r = _mm_mask_scalef_round_sd(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, a, b, + ); let e = _mm_set_pd(1., 1.); assert_eq_m128d(r, e); - let r = _mm_mask_scalef_round_sd( - a, - 0b11111111, - a, - b, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, a, b, ); let e = _mm_set_pd(1., 8.); assert_eq_m128d(r, e); @@ -53544,14 +53139,12 @@ mod tests { unsafe fn test_mm_maskz_scalef_round_sd() { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(3.); - let r = _mm_maskz_scalef_round_sd(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_pd(1., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_scalef_round_sd( - 0b11111111, - a, - b, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, ); let e = _mm_set_pd(1., 8.); assert_eq_m128d(r, e); @@ -53562,7 +53155,7 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_fmadd_round_ss(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); let e = _mm_set_ps(1., 1., 1., 5.); assert_eq_m128(r, e); } @@ -53572,14 +53165,12 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_mask_fmadd_round_ss(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); assert_eq_m128(r, a); - let r = _mm_mask_fmadd_round_ss( - a, - 0b11111111, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, ); let e = _mm_set_ps(1., 1., 1., 5.); assert_eq_m128(r, e); @@ -53590,15 +53181,13 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_maskz_fmadd_round_ss(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); let e = _mm_set_ps(1., 1., 1., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_fmadd_round_ss( - 0b11111111, - a, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, ); let e = _mm_set_ps(1., 1., 1., 5.); assert_eq_m128(r, e); @@ -53609,14 +53198,12 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_mask3_fmadd_round_ss(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); assert_eq_m128(r, c); - let r = _mm_mask3_fmadd_round_ss( - a, - b, - c, - 0b11111111, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, ); let e = _mm_set_ps(3., 3., 3., 5.); assert_eq_m128(r, e); @@ -53627,7 +53214,7 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_fmadd_round_sd(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); let e = _mm_set_pd(1., 5.); assert_eq_m128d(r, e); } @@ -53637,14 +53224,12 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_mask_fmadd_round_sd(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); assert_eq_m128d(r, a); - let r = _mm_mask_fmadd_round_sd( - a, - 0b11111111, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, ); let e = _mm_set_pd(1., 5.); assert_eq_m128d(r, e); @@ -53655,15 +53240,13 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_maskz_fmadd_round_sd(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); let e = _mm_set_pd(1., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_fmadd_round_sd( - 0b11111111, - a, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, ); let e = _mm_set_pd(1., 5.); assert_eq_m128d(r, e); @@ -53674,14 +53257,12 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_mask3_fmadd_round_sd(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); assert_eq_m128d(r, c); - let r = _mm_mask3_fmadd_round_sd( - a, - b, - c, - 0b11111111, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, ); let e = _mm_set_pd(3., 5.); assert_eq_m128d(r, e); @@ -53692,7 +53273,7 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_fmsub_round_ss(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); let e = _mm_set_ps(1., 1., 1., -1.); assert_eq_m128(r, e); } @@ -53702,14 +53283,12 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_mask_fmsub_round_ss(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); assert_eq_m128(r, a); - let r = _mm_mask_fmsub_round_ss( - a, - 0b11111111, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, ); let e = _mm_set_ps(1., 1., 1., -1.); assert_eq_m128(r, e); @@ -53720,15 +53299,13 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_maskz_fmsub_round_ss(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); let e = _mm_set_ps(1., 1., 1., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_fmsub_round_ss( - 0b11111111, - a, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, ); let e = _mm_set_ps(1., 1., 1., -1.); assert_eq_m128(r, e); @@ -53739,14 +53316,12 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_mask3_fmsub_round_ss(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); assert_eq_m128(r, c); - let r = _mm_mask3_fmsub_round_ss( - a, - b, - c, - 0b11111111, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, ); let e = _mm_set_ps(3., 3., 3., -1.); assert_eq_m128(r, e); @@ -53757,7 +53332,7 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_fmsub_round_sd(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); let e = _mm_set_pd(1., -1.); assert_eq_m128d(r, e); } @@ -53767,14 +53342,12 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_mask_fmsub_round_sd(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); assert_eq_m128d(r, a); - let r = _mm_mask_fmsub_round_sd( - a, - 0b11111111, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, ); let e = _mm_set_pd(1., -1.); assert_eq_m128d(r, e); @@ -53785,15 +53358,13 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_maskz_fmsub_round_sd(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); let e = _mm_set_pd(1., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_fmsub_round_sd( - 0b11111111, - a, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, ); let e = _mm_set_pd(1., -1.); assert_eq_m128d(r, e); @@ -53804,14 +53375,12 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_mask3_fmsub_round_sd(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); assert_eq_m128d(r, c); - let r = _mm_mask3_fmsub_round_sd( - a, - b, - c, - 0b11111111, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, ); let e = _mm_set_pd(3., -1.); assert_eq_m128d(r, e); @@ -53822,7 +53391,7 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_fnmadd_round_ss(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); let e = _mm_set_ps(1., 1., 1., 1.); assert_eq_m128(r, e); } @@ -53832,14 +53401,12 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_mask_fnmadd_round_ss(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); assert_eq_m128(r, a); - let r = _mm_mask_fnmadd_round_ss( - a, - 0b11111111, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, ); let e = _mm_set_ps(1., 1., 1., 1.); assert_eq_m128(r, e); @@ -53850,16 +53417,13 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = - _mm_maskz_fnmadd_round_ss(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); let e = _mm_set_ps(1., 1., 1., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_fnmadd_round_ss( - 0b11111111, - a, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, ); let e = _mm_set_ps(1., 1., 1., 1.); assert_eq_m128(r, e); @@ -53870,15 +53434,12 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = - _mm_mask3_fnmadd_round_ss(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); assert_eq_m128(r, c); - let r = _mm_mask3_fnmadd_round_ss( - a, - b, - c, - 0b11111111, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, ); let e = _mm_set_ps(3., 3., 3., 1.); assert_eq_m128(r, e); @@ -53889,7 +53450,7 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_fnmadd_round_sd(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); let e = _mm_set_pd(1., 1.); assert_eq_m128d(r, e); } @@ -53899,14 +53460,12 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_mask_fnmadd_round_sd(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); assert_eq_m128d(r, a); - let r = _mm_mask_fnmadd_round_sd( - a, - 0b11111111, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, ); let e = _mm_set_pd(1., 1.); assert_eq_m128d(r, e); @@ -53917,16 +53476,13 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = - _mm_maskz_fnmadd_round_sd(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); let e = _mm_set_pd(1., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_fnmadd_round_sd( - 0b11111111, - a, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, ); let e = _mm_set_pd(1., 1.); assert_eq_m128d(r, e); @@ -53937,15 +53493,12 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = - _mm_mask3_fnmadd_round_sd(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); assert_eq_m128d(r, c); - let r = _mm_mask3_fnmadd_round_sd( - a, - b, - c, - 0b11111111, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, ); let e = _mm_set_pd(3., 1.); assert_eq_m128d(r, e); @@ -53956,7 +53509,7 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_fnmsub_round_ss(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); let e = _mm_set_ps(1., 1., 1., -5.); assert_eq_m128(r, e); } @@ -53966,14 +53519,12 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_mask_fnmsub_round_ss(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); assert_eq_m128(r, a); - let r = _mm_mask_fnmsub_round_ss( - a, - 0b11111111, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, ); let e = _mm_set_ps(1., 1., 1., -5.); assert_eq_m128(r, e); @@ -53984,16 +53535,13 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = - _mm_maskz_fnmsub_round_ss(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); let e = _mm_set_ps(1., 1., 1., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_fnmsub_round_ss( - 0b11111111, - a, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, ); let e = _mm_set_ps(1., 1., 1., -5.); assert_eq_m128(r, e); @@ -54004,15 +53552,12 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = - _mm_mask3_fnmsub_round_ss(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); assert_eq_m128(r, c); - let r = _mm_mask3_fnmsub_round_ss( - a, - b, - c, - 0b11111111, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, ); let e = _mm_set_ps(3., 3., 3., -5.); assert_eq_m128(r, e); @@ -54023,7 +53568,7 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_fnmsub_round_sd(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); let e = _mm_set_pd(1., -5.); assert_eq_m128d(r, e); } @@ -54033,14 +53578,12 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_mask_fnmsub_round_sd(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); assert_eq_m128d(r, a); - let r = _mm_mask_fnmsub_round_sd( - a, - 0b11111111, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, ); let e = _mm_set_pd(1., -5.); assert_eq_m128d(r, e); @@ -54051,16 +53594,13 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = - _mm_maskz_fnmsub_round_sd(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); let e = _mm_set_pd(1., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_fnmsub_round_sd( - 0b11111111, - a, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, ); let e = _mm_set_pd(1., -5.); assert_eq_m128d(r, e); @@ -54071,15 +53611,12 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = - _mm_mask3_fnmsub_round_sd(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); assert_eq_m128d(r, c); - let r = _mm_mask3_fnmsub_round_sd( - a, - b, - c, - 0b11111111, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, ); let e = _mm_set_pd(3., -5.); assert_eq_m128d(r, e); @@ -54299,7 +53836,7 @@ mod tests { unsafe fn test_mm_cvt_roundsd_ss() { let a = _mm_set_ps(0., -0.5, 1., -1.5); let b = _mm_set_pd(6., -7.5); - let r = _mm_cvt_roundsd_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(0., -0.5, 1., -7.5); assert_eq_m128(r, e); } @@ -54308,10 +53845,11 @@ mod tests { unsafe fn test_mm_mask_cvt_roundsd_ss() { let a = _mm_set_ps(0., -0.5, 1., -1.5); let b = _mm_set_pd(6., -7.5); - let r = _mm_mask_cvt_roundsd_ss(a, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b); assert_eq_m128(r, a); - let r = - _mm_mask_cvt_roundsd_ss(a, 0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + a, 0b11111111, a, b, + ); let e = _mm_set_ps(0., -0.5, 1., -7.5); assert_eq_m128(r, e); } @@ -54320,10 +53858,12 @@ mod tests { unsafe fn test_mm_maskz_cvt_roundsd_ss() { let a = _mm_set_ps(0., -0.5, 1., -1.5); let b = _mm_set_pd(6., -7.5); - let r = _mm_maskz_cvt_roundsd_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_ps(0., -0.5, 1., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_cvt_roundsd_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, + ); let e = _mm_set_ps(0., -0.5, 1., -7.5); assert_eq_m128(r, e); } @@ -54331,7 +53871,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundss_si32() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvt_roundss_si32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundss_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: i32 = -1; assert_eq!(r, e); } @@ -54339,7 +53879,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundss_i32() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvt_roundss_i32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundss_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: i32 = -1; assert_eq!(r, e); } @@ -54347,7 +53887,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundss_u32() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvt_roundss_u32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundss_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: u32 = u32::MAX; assert_eq!(r, e); } @@ -54371,7 +53911,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundsd_si32() { let a = _mm_set_pd(1., -1.5); - let r = _mm_cvt_roundsd_si32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundsd_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: i32 = -1; assert_eq!(r, e); } @@ -54379,7 +53919,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundsd_i32() { let a = _mm_set_pd(1., -1.5); - let r = _mm_cvt_roundsd_i32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundsd_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: i32 = -1; assert_eq!(r, e); } @@ -54387,7 +53927,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundsd_u32() { let a = _mm_set_pd(1., -1.5); - let r = _mm_cvt_roundsd_u32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundsd_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: u32 = u32::MAX; assert_eq!(r, e); } @@ -54412,7 +53952,7 @@ mod tests { unsafe fn test_mm_cvt_roundi32_ss() { let a = _mm_set_ps(0., -0.5, 1., -1.5); let b: i32 = 9; - let r = _mm_cvt_roundi32_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(0., -0.5, 1., 9.); assert_eq_m128(r, e); } @@ -54421,7 +53961,7 @@ mod tests { unsafe fn test_mm_cvt_roundsi32_ss() { let a = _mm_set_ps(0., -0.5, 1., -1.5); let b: i32 = 9; - let r = _mm_cvt_roundsi32_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundsi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(0., -0.5, 1., 9.); assert_eq_m128(r, e); } @@ -54430,7 +53970,7 @@ mod tests { unsafe fn test_mm_cvt_roundu32_ss() { let a = _mm_set_ps(0., -0.5, 1., -1.5); let b: u32 = 9; - let r = _mm_cvt_roundu32_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundu32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(0., -0.5, 1., 9.); assert_eq_m128(r, e); } diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index caaf3e6d73..ae6202bc73 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -6288,7 +6288,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvt_roundpd_ps() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_cvt_roundpd_ps(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(a); let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); assert_eq_m256(r, e); } @@ -6297,9 +6297,9 @@ mod tests { unsafe fn test_mm512_mask_cvt_roundpd_ps() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); let src = _mm256_set1_ps(0.); - let r = _mm512_mask_cvt_roundpd_ps(src, 0, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_mask_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(src, 0, a); assert_eq_m256(r, src); - let r = _mm512_mask_cvt_roundpd_ps(src, 0b00001111, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_mask_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(src, 0b00001111, a); let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 0., 0., 0., 0.); assert_eq_m256(r, e); } @@ -6307,9 +6307,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvt_roundpd_ps() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_maskz_cvt_roundpd_ps(0, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_maskz_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(0, a); assert_eq_m256(r, _mm256_setzero_ps()); - let r = _mm512_maskz_cvt_roundpd_ps(0b00001111, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_maskz_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a); let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 0., 0., 0., 0.); assert_eq_m256(r, e); } @@ -6317,7 +6317,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvt_roundpd_epi32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_cvt_roundpd_epi32(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(a); let e = _mm256_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8); assert_eq_m256i(r, e); } @@ -6326,9 +6326,9 @@ mod tests { unsafe fn test_mm512_mask_cvt_roundpd_epi32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); let src = _mm256_set1_epi32(0); - let r = _mm512_mask_cvt_roundpd_epi32(src, 0, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_mask_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(src, 0, a); assert_eq_m256i(r, src); - let r = _mm512_mask_cvt_roundpd_epi32(src, 0b00001111, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_mask_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(src, 0b00001111, a); let e = _mm256_setr_epi32(0, -2, 2, -4, 0, 0, 0, 0); assert_eq_m256i(r, e); } @@ -6336,9 +6336,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvt_roundpd_epi32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_maskz_cvt_roundpd_epi32(0, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_maskz_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm512_maskz_cvt_roundpd_epi32(0b00001111, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_maskz_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a); let e = _mm256_setr_epi32(0, -2, 2, -4, 0, 0, 0, 0); assert_eq_m256i(r, e); } @@ -6346,7 +6346,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvt_roundpd_epu32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_cvt_roundpd_epu32(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(a); let e = _mm256_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1); assert_eq_m256i(r, e); } @@ -6355,9 +6355,9 @@ mod tests { unsafe fn test_mm512_mask_cvt_roundpd_epu32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); let src = _mm256_set1_epi32(0); - let r = _mm512_mask_cvt_roundpd_epu32(src, 0, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_mask_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(src, 0, a); assert_eq_m256i(r, src); - let r = _mm512_mask_cvt_roundpd_epu32(src, 0b00001111, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_mask_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(src, 0b00001111, a); let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0); assert_eq_m256i(r, e); } @@ -6365,9 +6365,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvt_roundpd_epu32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_maskz_cvt_roundpd_epu32(0, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_maskz_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm512_maskz_cvt_roundpd_epu32(0b00001111, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_maskz_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a); let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0); assert_eq_m256i(r, e); } From a4a01fec8e75f04542b9158d69316c7df73bd3c5 Mon Sep 17 00:00:00 2001 From: jironglin Date: Wed, 3 Mar 2021 15:46:38 +0000 Subject: [PATCH 002/123] cvt_roundps_pd; cvt_roundps_ph; cvt_roundph_ps; cvtps_ph; cvtt_roundps,pd_epi32,epu32; mm_max,min_round_ss,sd; mm_getexp_ss,sd; mm_cvt_roundss_sd; cvt_roundss_si32,i32,u32; mm_cvtt_roundsd_si32,i32,u32 --- crates/core_arch/src/x86/avx512f.rs | 905 ++++++++++--------------- crates/core_arch/src/x86_64/avx512f.rs | 30 +- 2 files changed, 375 insertions(+), 560 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 7911157eb2..bcd826d700 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -13533,17 +13533,13 @@ pub unsafe fn _mm512_maskz_cvt_roundps_epu32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundps_pd&expand=1347) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2pd, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvt_roundps_pd(a: __m256, sae: i32) -> __m512d { +#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvt_roundps_pd(a: __m256) -> __m512d { + static_assert_sae!(SAE); let a = a.as_f32x8(); let zero = _mm512_setzero_pd().as_f64x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2pd(a, zero, 0b11111111, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtps2pd(a, zero, 0b11111111, SAE); transmute(r) } @@ -13553,22 +13549,17 @@ pub unsafe fn _mm512_cvt_roundps_pd(a: __m256, sae: i32) -> __m512d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundps_epi32&expand=1336) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2pd, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvt_roundps_pd( +#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvt_roundps_pd( src: __m512d, k: __mmask8, a: __m256, - sae: i32, ) -> __m512d { + static_assert_sae!(SAE); let a = a.as_f32x8(); let src = src.as_f64x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2pd(a, src, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtps2pd(a, src, k, SAE); transmute(r) } @@ -13578,17 +13569,13 @@ pub unsafe fn _mm512_mask_cvt_roundps_pd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundps_epi32&expand=1337) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2pd, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvt_roundps_pd(k: __mmask8, a: __m256, sae: i32) -> __m512d { +#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvt_roundps_pd(k: __mmask8, a: __m256) -> __m512d { + static_assert_sae!(SAE); let a = a.as_f32x8(); let zero = _mm512_setzero_pd().as_f64x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2pd(a, zero, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtps2pd(a, zero, k, SAE); transmute(r) } @@ -13956,17 +13943,13 @@ pub unsafe fn _mm512_maskz_cvt_roundepu32_ps( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundps_ph&expand=1354) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvt_roundps_ph(a: __m512, sae: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvt_roundps_ph(a: __m512) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let zero = _mm256_setzero_si256().as_i16x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2ph(a, $imm4, zero, 0b11111111_11111111) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtps2ph(a, SAE, zero, 0b11111111_11111111); transmute(r) } @@ -13976,22 +13959,17 @@ pub unsafe fn _mm512_cvt_roundps_ph(a: __m512, sae: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundps_ph&expand=1355) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvt_roundps_ph( +#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvt_roundps_ph( src: __m256i, k: __mmask16, a: __m512, - sae: i32, ) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let src = src.as_i16x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2ph(a, $imm4, src, k) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtps2ph(a, SAE, src, k); transmute(r) } @@ -14001,17 +13979,13 @@ pub unsafe fn _mm512_mask_cvt_roundps_ph( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundps_ph&expand=1356) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvt_roundps_ph(k: __mmask16, a: __m512, sae: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvt_roundps_ph(k: __mmask16, a: __m512) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let zero = _mm256_setzero_si256().as_i16x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2ph(a, $imm4, zero, k) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtps2ph(a, SAE, zero, k); transmute(r) } @@ -14126,17 +14100,13 @@ pub unsafe fn _mm_maskz_cvt_roundps_ph(k: __mmask8, a: __m128, imm8: i32) -> __m /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtps_ph&expand=1778) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvtps_ph(a: __m512, sae: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvtps_ph(a: __m512) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let zero = _mm256_setzero_si256().as_i16x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2ph(a, $imm4, zero, 0b11111111_11111111) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtps2ph(a, SAE, zero, 0b11111111_11111111); transmute(r) } @@ -14146,17 +14116,17 @@ pub unsafe fn _mm512_cvtps_ph(a: __m512, sae: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtps_ph&expand=1779) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvtps_ph(src: __m256i, k: __mmask16, a: __m512, sae: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvtps_ph( + src: __m256i, + k: __mmask16, + a: __m512, +) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let src = src.as_i16x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2ph(a, $imm4, src, k) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtps2ph(a, SAE, src, k); transmute(r) } @@ -14166,17 +14136,13 @@ pub unsafe fn _mm512_mask_cvtps_ph(src: __m256i, k: __mmask16, a: __m512, sae: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtps_ph&expand=1780) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvtps_ph(k: __mmask16, a: __m512, sae: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvtps_ph(k: __mmask16, a: __m512) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let zero = _mm256_setzero_si256().as_i16x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2ph(a, $imm4, zero, k) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtps2ph(a, SAE, zero, k); transmute(r) } @@ -14286,17 +14252,13 @@ pub unsafe fn _mm_maskz_cvtps_ph(k: __mmask8, a: __m128, imm8: i32) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_ps&expand=1332) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtph2ps, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvt_roundph_ps(a: __m256i, sae: i32) -> __m512 { +#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvt_roundph_ps(a: __m256i) -> __m512 { + static_assert_sae!(SAE); let a = a.as_i16x16(); let zero = _mm512_setzero_ps().as_f32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtph2ps(a, zero, 0b11111111_11111111, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtph2ps(a, zero, 0b11111111_11111111, SAE); transmute(r) } @@ -14306,22 +14268,17 @@ pub unsafe fn _mm512_cvt_roundph_ps(a: __m256i, sae: i32) -> __m512 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_ps&expand=1333) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtph2ps, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvt_roundph_ps( +#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvt_roundph_ps( src: __m512, k: __mmask16, a: __m256i, - sae: i32, ) -> __m512 { + static_assert_sae!(SAE); let a = a.as_i16x16(); let src = src.as_f32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtph2ps(a, src, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtph2ps(a, src, k, SAE); transmute(r) } @@ -14331,17 +14288,13 @@ pub unsafe fn _mm512_mask_cvt_roundph_ps( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_ps&expand=1334) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtph2ps, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvt_roundph_ps(k: __mmask16, a: __m256i, sae: i32) -> __m512 { +#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvt_roundph_ps(k: __mmask16, a: __m256i) -> __m512 { + static_assert_sae!(SAE); let a = a.as_i16x16(); let zero = _mm512_setzero_ps().as_f32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtph2ps(a, zero, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtph2ps(a, zero, k, SAE); transmute(r) } @@ -14442,17 +14395,13 @@ pub unsafe fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundps_epi32&expand=1916) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttps2dq, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvtt_roundps_epi32(a: __m512, sae: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvtt_roundps_epi32(a: __m512) -> __m512i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let zero = _mm512_setzero_si512().as_i32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvttps2dq(a, zero, 0b11111111_11111111, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttps2dq(a, zero, 0b11111111_11111111, SAE); transmute(r) } @@ -14462,22 +14411,17 @@ pub unsafe fn _mm512_cvtt_roundps_epi32(a: __m512, sae: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundps_epi32&expand=1917) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttps2dq, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvtt_roundps_epi32( +#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvtt_roundps_epi32( src: __m512i, k: __mmask16, a: __m512, - sae: i32, ) -> __m512i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let src = src.as_i32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvttps2dq(a, src, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttps2dq(a, src, k, SAE); transmute(r) } @@ -14487,17 +14431,13 @@ pub unsafe fn _mm512_mask_cvtt_roundps_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttps2dq, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvtt_roundps_epi32(k: __mmask16, a: __m512, sae: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvtt_roundps_epi32(k: __mmask16, a: __m512) -> __m512i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let zero = _mm512_setzero_si512().as_i32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvttps2dq(a, zero, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttps2dq(a, zero, k, SAE); transmute(r) } @@ -14507,17 +14447,13 @@ pub unsafe fn _mm512_maskz_cvtt_roundps_epi32(k: __mmask16, a: __m512, sae: i32) /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundps_epu32&expand=1922) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttps2udq, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvtt_roundps_epu32(a: __m512, sae: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvtt_roundps_epu32(a: __m512) -> __m512i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let zero = _mm512_setzero_si512().as_u32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvttps2udq(a, zero, 0b11111111_11111111, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttps2udq(a, zero, 0b11111111_11111111, SAE); transmute(r) } @@ -14527,22 +14463,17 @@ pub unsafe fn _mm512_cvtt_roundps_epu32(a: __m512, sae: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundps_epu32&expand=1923) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttps2udq, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvtt_roundps_epu32( +#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvtt_roundps_epu32( src: __m512i, k: __mmask16, a: __m512, - sae: i32, ) -> __m512i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let src = src.as_u32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvttps2udq(a, src, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttps2udq(a, src, k, SAE); transmute(r) } @@ -14552,17 +14483,13 @@ pub unsafe fn _mm512_mask_cvtt_roundps_epu32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundps_epu32&expand=1924) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttps2udq, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvtt_roundps_epu32(k: __mmask16, a: __m512, sae: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvtt_roundps_epu32(k: __mmask16, a: __m512) -> __m512i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let zero = _mm512_setzero_si512().as_u32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvttps2udq(a, zero, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttps2udq(a, zero, k, SAE); transmute(r) } @@ -14572,17 +14499,13 @@ pub unsafe fn _mm512_maskz_cvtt_roundps_epu32(k: __mmask16, a: __m512, sae: i32) /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundpd_epi32&expand=1904) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttpd2dq, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvtt_roundpd_epi32(a: __m512d, sae: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvtt_roundpd_epi32(a: __m512d) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f64x8(); let zero = _mm256_setzero_si256().as_i32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvttpd2dq(a, zero, 0b11111111, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttpd2dq(a, zero, 0b11111111, SAE); transmute(r) } @@ -14592,22 +14515,17 @@ pub unsafe fn _mm512_cvtt_roundpd_epi32(a: __m512d, sae: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundpd_epi32&expand=1905) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttpd2dq, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvtt_roundpd_epi32( +#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvtt_roundpd_epi32( src: __m256i, k: __mmask8, a: __m512d, - sae: i32, ) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f64x8(); let src = src.as_i32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvttpd2dq(a, src, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttpd2dq(a, src, k, SAE); transmute(r) } @@ -14617,17 +14535,13 @@ pub unsafe fn _mm512_mask_cvtt_roundpd_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttpd2dq, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvtt_roundpd_epi32(k: __mmask8, a: __m512d, sae: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvtt_roundpd_epi32(k: __mmask8, a: __m512d) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f64x8(); let zero = _mm256_setzero_si256().as_i32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvttpd2dq(a, zero, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttpd2dq(a, zero, k, SAE); transmute(r) } @@ -14637,17 +14551,13 @@ pub unsafe fn _mm512_maskz_cvtt_roundpd_epi32(k: __mmask8, a: __m512d, sae: i32) /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundpd_epu32&expand=1910) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttpd2udq, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvtt_roundpd_epu32(a: __m512d, sae: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvtt_roundpd_epu32(a: __m512d) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f64x8(); let zero = _mm256_setzero_si256().as_i32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvttpd2udq(a, zero, 0b11111111, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttpd2udq(a, zero, 0b11111111, SAE); transmute(r) } @@ -14657,22 +14567,17 @@ pub unsafe fn _mm512_cvtt_roundpd_epu32(a: __m512d, sae: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundpd_epu32&expand=1911) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttpd2udq, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvtt_roundpd_epu32( +#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvtt_roundpd_epu32( src: __m256i, k: __mmask8, a: __m512d, - sae: i32, ) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f64x8(); let src = src.as_i32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvttpd2udq(a, src, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttpd2udq(a, src, k, SAE); transmute(r) } @@ -14896,17 +14801,13 @@ pub unsafe fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundpd_epu32&expand=1912) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttpd2udq, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvtt_roundpd_epu32(k: __mmask8, a: __m512d, sae: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvtt_roundpd_epu32(k: __mmask8, a: __m512d) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f64x8(); let zero = _mm256_setzero_si256().as_i32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvttpd2udq(a, zero, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttpd2udq(a, zero, k, SAE); transmute(r) } @@ -34064,18 +33965,15 @@ pub unsafe fn _mm_maskz_div_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_max_round_ss&expand=3668) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmaxss, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_max_round_ss(a: __m128, b: __m128, sae: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_max_round_ss(a: __m128, b: __m128) -> __m128 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vmaxss(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vmaxss(a, b, zero, 0b1, SAE); + transmute(r) } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34084,24 +33982,20 @@ pub unsafe fn _mm_max_round_ss(a: __m128, b: __m128, sae: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_max_ss&expand=3672) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmaxss, sae = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_max_round_ss( +#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_max_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, - sae: i32, ) -> __m128 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); let src = src.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vmaxss(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vmaxss(a, b, src, k, SAE); + transmute(r) } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34110,18 +34004,15 @@ pub unsafe fn _mm_mask_max_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_max_round_ss&expand=3667) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmaxss, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_max_round_ss(k: __mmask8, a: __m128, b: __m128, sae: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_max_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vmaxss(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vmaxss(a, b, zero, k, SAE); + transmute(r) } /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -34130,18 +34021,15 @@ pub unsafe fn _mm_maskz_max_round_ss(k: __mmask8, a: __m128, b: __m128, sae: i32 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_max_round_sd&expand=3665) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmaxsd, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_max_round_sd(a: __m128d, b: __m128d, sae: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_max_round_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vmaxsd(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vmaxsd(a, b, zero, 0b1, SAE); + transmute(r) } /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -34150,24 +34038,20 @@ pub unsafe fn _mm_max_round_sd(a: __m128d, b: __m128d, sae: i32) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_max_round_sd&expand=3663) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmaxsd, sae = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_max_round_sd( +#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_max_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, - sae: i32, ) -> __m128d { + static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); let src = src.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vmaxsd(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vmaxsd(a, b, src, k, SAE); + transmute(r) } /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -34176,18 +34060,19 @@ pub unsafe fn _mm_mask_max_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_max_sd&expand=3670) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmaxsd, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_max_round_sd(k: __mmask8, a: __m128d, b: __m128d, sae: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_max_round_sd( + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vmaxsd(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vmaxsd(a, b, zero, k, SAE); + transmute(r) } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34196,18 +34081,15 @@ pub unsafe fn _mm_maskz_max_round_sd(k: __mmask8, a: __m128d, b: __m128d, sae: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_min_round_ss&expand=3782) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vminss, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_min_round_ss(a: __m128, b: __m128, sae: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vminss, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_min_round_ss(a: __m128, b: __m128) -> __m128 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vminss(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vminss(a, b, zero, 0b1, SAE); + transmute(r) } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34216,24 +34098,20 @@ pub unsafe fn _mm_min_round_ss(a: __m128, b: __m128, sae: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_min_round_Ss&expand=3780) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vminss, sae = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_min_round_ss( +#[cfg_attr(test, assert_instr(vminss, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_min_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, - sae: i32, ) -> __m128 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); let src = src.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vminss(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vminss(a, b, src, k, SAE); + transmute(r) } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34242,18 +34120,15 @@ pub unsafe fn _mm_mask_min_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_min_round_ss&expand=3781) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vminss, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_min_round_ss(k: __mmask8, a: __m128, b: __m128, sae: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vminss, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_min_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vminss(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vminss(a, b, zero, k, SAE); + transmute(r) } /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst , and copy the upper element from a to the upper element of dst.\ @@ -34262,18 +34137,15 @@ pub unsafe fn _mm_maskz_min_round_ss(k: __mmask8, a: __m128, b: __m128, sae: i32 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_min_round_sd&expand=3779) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vminsd, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_min_round_sd(a: __m128d, b: __m128d, sae: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vminsd, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_min_round_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vminsd(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vminsd(a, b, zero, 0b1, SAE); + transmute(r) } /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -34282,24 +34154,20 @@ pub unsafe fn _mm_min_round_sd(a: __m128d, b: __m128d, sae: i32) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_min_round_sd&expand=3777) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vminsd, sae = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_min_round_sd( +#[cfg_attr(test, assert_instr(vminsd, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_min_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, - sae: i32, ) -> __m128d { + static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); let src = src.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vminsd(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vminsd(a, b, src, k, SAE); + transmute(r) } /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -34308,18 +34176,19 @@ pub unsafe fn _mm_mask_min_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_min_round_Sd&expand=3778) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vminsd, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_min_round_sd(k: __mmask8, a: __m128d, b: __m128d, sae: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vminsd, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_min_round_sd( + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vminsd(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vminsd(a, b, zero, k, SAE); + transmute(r) } /// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34484,18 +34353,14 @@ pub unsafe fn _mm_maskz_sqrt_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_getexp_round_ss&expand=2856) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vgetexpss, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_getexp_round_ss(a: __m128, b: __m128, sae: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_getexp_round_ss(a: __m128, b: __m128) -> __m128 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vgetexpss(a, b, zero, 0b1, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vgetexpss(a, b, zero, 0b1, SAE); transmute(r) } @@ -34505,24 +34370,19 @@ pub unsafe fn _mm_getexp_round_ss(a: __m128, b: __m128, sae: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_getexp_round_ss&expand=2857) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vgetexpss, sae = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_getexp_round_ss( +#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_getexp_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, - sae: i32, ) -> __m128 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); let src = src.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vgetexpss(a, b, src, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vgetexpss(a, b, src, k, SAE); transmute(r) } @@ -34532,18 +34392,18 @@ pub unsafe fn _mm_mask_getexp_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_getexp_round_ss&expand=2858) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vgetexpss, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_getexp_round_ss(k: __mmask8, a: __m128, b: __m128, sae: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_getexp_round_ss( + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vgetexpss(a, b, zero, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vgetexpss(a, b, zero, k, SAE); transmute(r) } @@ -34553,18 +34413,14 @@ pub unsafe fn _mm_maskz_getexp_round_ss(k: __mmask8, a: __m128, b: __m128, sae: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_getexp_round_sd&expand=2853) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vgetexpsd, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_getexp_round_sd(a: __m128d, b: __m128d, sae: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_getexp_round_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vgetexpsd(a, b, zero, 0b1, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vgetexpsd(a, b, zero, 0b1, SAE); transmute(r) } @@ -34574,24 +34430,19 @@ pub unsafe fn _mm_getexp_round_sd(a: __m128d, b: __m128d, sae: i32) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_getexp_round_sd&expand=2854) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vgetexpsd, sae = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_getexp_round_sd( +#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_getexp_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, - sae: i32, ) -> __m128d { + static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); let src = src.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vgetexpsd(a, b, src, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vgetexpsd(a, b, src, k, SAE); transmute(r) } @@ -34601,18 +34452,18 @@ pub unsafe fn _mm_mask_getexp_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_getexp_round_sd&expand=2855) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vgetexpsd, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_getexp_round_sd(k: __mmask8, a: __m128d, b: __m128d, sae: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_getexp_round_sd( + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vgetexpsd(a, b, zero, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vgetexpsd(a, b, zero, k, SAE); transmute(r) } @@ -36577,21 +36428,14 @@ pub unsafe fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundss_sd&expand=1371) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2sd, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_cvt_roundss_sd(a: __m128d, b: __m128, sae: i32) -> __m128d { - macro_rules! call { - ($imm4:expr) => { - vcvtss2sd( - a.as_f64x2(), - b.as_f32x4(), - _mm_setzero_pd().as_f64x2(), - 0b11111111, - $imm4, - ) - }; - } - let r = constify_imm4_sae!(sae, call); +#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundss_sd(a: __m128d, b: __m128) -> __m128d { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f32x4(); + let zero = _mm_setzero_pd().as_f64x2(); + let r = vcvtss2sd(a, b, zero, 0b11111111, SAE); transmute(r) } @@ -36601,24 +36445,19 @@ pub unsafe fn _mm_cvt_roundss_sd(a: __m128d, b: __m128, sae: i32) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_cvt_roundss_sd&expand=1372) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2sd, sae = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_cvt_roundss_sd( +#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_cvt_roundss_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128, - sae: i32, ) -> __m128d { + static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f32x4(); let src = src.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2sd(a, b, src, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtss2sd(a, b, src, k, SAE); transmute(r) } @@ -36628,18 +36467,18 @@ pub unsafe fn _mm_mask_cvt_roundss_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_cvt_roundss_sd&expand=1373) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2sd, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_cvt_roundss_sd(k: __mmask8, a: __m128d, b: __m128, sae: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_cvt_roundss_sd( + k: __mmask8, + a: __m128d, + b: __m128, +) -> __m128d { + static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f32x4(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2sd(a, b, zero, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtss2sd(a, b, zero, k, SAE); transmute(r) } @@ -36970,16 +36809,12 @@ pub unsafe fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundss_Si32&expand=1936) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2si, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundss_si32(a: __m128, sae: i32) -> i32 { +#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundss_si32(a: __m128) -> i32 { + static_assert_sae!(SAE); let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2si(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtss2si(a, SAE); transmute(r) } @@ -36989,16 +36824,12 @@ pub unsafe fn _mm_cvtt_roundss_si32(a: __m128, sae: i32) -> i32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundss_i32&expand=1934) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2si, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundss_i32(a: __m128, sae: i32) -> i32 { +#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundss_i32(a: __m128) -> i32 { + static_assert_sae!(SAE); let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2si(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtss2si(a, SAE); transmute(r) } @@ -37008,16 +36839,12 @@ pub unsafe fn _mm_cvtt_roundss_i32(a: __m128, sae: i32) -> i32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundss_u32&expand=1938) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2usi, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundss_u32(a: __m128, sae: i32) -> u32 { +#[cfg_attr(test, assert_instr(vcvtss2usi, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundss_u32(a: __m128) -> u32 { + static_assert_sae!(SAE); let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2usi(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtss2usi(a, SAE); transmute(r) } @@ -37047,16 +36874,12 @@ pub unsafe fn _mm_cvttss_u32(a: __m128) -> u32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_si32&expand=1930) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundsd_si32(a: __m128d, sae: i32) -> i32 { +#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundsd_si32(a: __m128d) -> i32 { + static_assert_sae!(SAE); let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2si(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtsd2si(a, SAE); transmute(r) } @@ -37066,16 +36889,12 @@ pub unsafe fn _mm_cvtt_roundsd_si32(a: __m128d, sae: i32) -> i32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_i32&expand=1928) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundsd_i32(a: __m128d, sae: i32) -> i32 { +#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundsd_i32(a: __m128d) -> i32 { + static_assert_sae!(SAE); let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2si(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtsd2si(a, SAE); transmute(r) } @@ -37085,16 +36904,12 @@ pub unsafe fn _mm_cvtt_roundsd_i32(a: __m128d, sae: i32) -> i32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundsd_u32&expand=1932) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2usi, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundsd_u32(a: __m128d, sae: i32) -> u32 { +#[cfg_attr(test, assert_instr(vcvtsd2usi, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundsd_u32(a: __m128d) -> u32 { + static_assert_sae!(SAE); let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2usi(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtsd2usi(a, SAE); transmute(r) } @@ -44170,7 +43985,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvt_roundps_ph() { let a = _mm512_set1_ps(1.); - let r = _mm512_cvt_roundps_ph(a, _MM_FROUND_NO_EXC); + let r = _mm512_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(a); let e = _mm256_setr_epi64x( 4323521613979991040, 4323521613979991040, @@ -44184,9 +43999,9 @@ mod tests { unsafe fn test_mm512_mask_cvt_roundps_ph() { let a = _mm512_set1_ps(1.); let src = _mm256_set1_epi16(0); - let r = _mm512_mask_cvt_roundps_ph(src, 0, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a); assert_eq_m256i(r, src); - let r = _mm512_mask_cvt_roundps_ph(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a); let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0); assert_eq_m256i(r, e); } @@ -44194,9 +44009,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvt_roundps_ph() { let a = _mm512_set1_ps(1.); - let r = _mm512_maskz_cvt_roundps_ph(0, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm512_maskz_cvt_roundps_ph(0b00000000_11111111, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a); let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0); assert_eq_m256i(r, e); } @@ -44246,7 +44061,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtps_ph() { let a = _mm512_set1_ps(1.); - let r = _mm512_cvtps_ph(a, _MM_FROUND_NO_EXC); + let r = _mm512_cvtps_ph::<_MM_FROUND_NO_EXC>(a); let e = _mm256_setr_epi64x( 4323521613979991040, 4323521613979991040, @@ -44260,9 +44075,9 @@ mod tests { unsafe fn test_mm512_mask_cvtps_ph() { let a = _mm512_set1_ps(1.); let src = _mm256_set1_epi16(0); - let r = _mm512_mask_cvtps_ph(src, 0, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a); assert_eq_m256i(r, src); - let r = _mm512_mask_cvtps_ph(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a); let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0); assert_eq_m256i(r, e); } @@ -44270,9 +44085,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvtps_ph() { let a = _mm512_set1_ps(1.); - let r = _mm512_maskz_cvtps_ph(0, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm512_maskz_cvtps_ph(0b00000000_11111111, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a); let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0); assert_eq_m256i(r, e); } @@ -44327,7 +44142,7 @@ mod tests { 4323521613979991040, 4323521613979991040, ); - let r = _mm512_cvt_roundph_ps(a, _MM_FROUND_NO_EXC); + let r = _mm512_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(a); let e = _mm512_set1_ps(1.); assert_eq_m512(r, e); } @@ -44341,9 +44156,9 @@ mod tests { 4323521613979991040, ); let src = _mm512_set1_ps(0.); - let r = _mm512_mask_cvt_roundph_ps(src, 0, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0, a); assert_eq_m512(r, src); - let r = _mm512_mask_cvt_roundph_ps(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a); let e = _mm512_setr_ps( 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., ); @@ -44358,9 +44173,9 @@ mod tests { 4323521613979991040, 4323521613979991040, ); - let r = _mm512_maskz_cvt_roundph_ps(0, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0, a); assert_eq_m512(r, _mm512_setzero_ps()); - let r = _mm512_maskz_cvt_roundph_ps(0b00000000_11111111, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a); let e = _mm512_setr_ps( 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., ); @@ -44462,7 +44277,7 @@ mod tests { let a = _mm512_setr_ps( 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); - let r = _mm512_cvtt_roundps_epi32(a, _MM_FROUND_NO_EXC); + let r = _mm512_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(a); let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15); assert_eq_m512i(r, e); } @@ -44473,9 +44288,9 @@ mod tests { 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); let src = _mm512_set1_epi32(0); - let r = _mm512_mask_cvtt_roundps_epi32(src, 0, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0, a); assert_eq_m512i(r, src); - let r = _mm512_mask_cvtt_roundps_epi32(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a); let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); } @@ -44485,9 +44300,9 @@ mod tests { let a = _mm512_setr_ps( 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); - let r = _mm512_maskz_cvtt_roundps_epi32(0, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_cvtt_roundps_epi32(0b00000000_11111111, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a); let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); } @@ -44497,7 +44312,7 @@ mod tests { let a = _mm512_setr_ps( 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); - let r = _mm512_cvtt_roundps_epu32(a, _MM_FROUND_NO_EXC); + let r = _mm512_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(a); let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15); assert_eq_m512i(r, e); } @@ -44508,9 +44323,9 @@ mod tests { 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); let src = _mm512_set1_epi32(0); - let r = _mm512_mask_cvtt_roundps_epu32(src, 0, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0, a); assert_eq_m512i(r, src); - let r = _mm512_mask_cvtt_roundps_epu32(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a); let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); } @@ -44520,9 +44335,9 @@ mod tests { let a = _mm512_setr_ps( 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); - let r = _mm512_maskz_cvtt_roundps_epu32(0, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_cvtt_roundps_epu32(0b00000000_11111111, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a); let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); } @@ -52598,7 +52413,7 @@ mod tests { unsafe fn test_mm_max_round_ss() { let a = _mm_set_ps(0., 1., 2., 3.); let b = _mm_set_ps(4., 5., 6., 7.); - let r = _mm_max_round_ss(a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b); let e = _mm_set_ps(0., 1., 2., 7.); assert_eq_m128(r, e); } @@ -52607,10 +52422,10 @@ mod tests { unsafe fn test_mm_mask_max_round_ss() { let a = _mm_set_ps(0., 1., 2., 3.); let b = _mm_set_ps(4., 5., 6., 7.); - let r = _mm_mask_max_round_ss(a, 0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); let e = _mm_set_ps(0., 1., 2., 3.); assert_eq_m128(r, e); - let r = _mm_mask_max_round_ss(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); let e = _mm_set_ps(0., 1., 2., 7.); assert_eq_m128(r, e); } @@ -52619,10 +52434,10 @@ mod tests { unsafe fn test_mm_maskz_max_round_ss() { let a = _mm_set_ps(0., 1., 2., 3.); let b = _mm_set_ps(4., 5., 6., 7.); - let r = _mm_maskz_max_round_ss(0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b); let e = _mm_set_ps(0., 1., 2., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_max_round_ss(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); let e = _mm_set_ps(0., 1., 2., 7.); assert_eq_m128(r, e); } @@ -52631,7 +52446,7 @@ mod tests { unsafe fn test_mm_max_round_sd() { let a = _mm_set_pd(0., 1.); let b = _mm_set_pd(2., 3.); - let r = _mm_max_round_sd(a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b); let e = _mm_set_pd(0., 3.); assert_eq_m128d(r, e); } @@ -52640,10 +52455,10 @@ mod tests { unsafe fn test_mm_mask_max_round_sd() { let a = _mm_set_pd(0., 1.); let b = _mm_set_pd(2., 3.); - let r = _mm_mask_max_round_sd(a, 0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); let e = _mm_set_pd(0., 1.); assert_eq_m128d(r, e); - let r = _mm_mask_max_round_sd(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); let e = _mm_set_pd(0., 3.); assert_eq_m128d(r, e); } @@ -52652,10 +52467,10 @@ mod tests { unsafe fn test_mm_maskz_max_round_sd() { let a = _mm_set_pd(0., 1.); let b = _mm_set_pd(2., 3.); - let r = _mm_maskz_max_round_sd(0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b); let e = _mm_set_pd(0., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_max_round_sd(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); let e = _mm_set_pd(0., 3.); assert_eq_m128d(r, e); } @@ -52664,7 +52479,7 @@ mod tests { unsafe fn test_mm_min_round_ss() { let a = _mm_set_ps(0., 1., 2., 3.); let b = _mm_set_ps(4., 5., 6., 7.); - let r = _mm_min_round_ss(a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b); let e = _mm_set_ps(0., 1., 2., 3.); assert_eq_m128(r, e); } @@ -52673,10 +52488,10 @@ mod tests { unsafe fn test_mm_mask_min_round_ss() { let a = _mm_set_ps(0., 1., 2., 3.); let b = _mm_set_ps(4., 5., 6., 7.); - let r = _mm_mask_min_round_ss(a, 0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); let e = _mm_set_ps(0., 1., 2., 3.); assert_eq_m128(r, e); - let r = _mm_mask_min_round_ss(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); let e = _mm_set_ps(0., 1., 2., 3.); assert_eq_m128(r, e); } @@ -52685,10 +52500,10 @@ mod tests { unsafe fn test_mm_maskz_min_round_ss() { let a = _mm_set_ps(0., 1., 2., 3.); let b = _mm_set_ps(4., 5., 6., 7.); - let r = _mm_maskz_min_round_ss(0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b); let e = _mm_set_ps(0., 1., 2., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_min_round_ss(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); let e = _mm_set_ps(0., 1., 2., 3.); assert_eq_m128(r, e); } @@ -52697,7 +52512,7 @@ mod tests { unsafe fn test_mm_min_round_sd() { let a = _mm_set_pd(0., 1.); let b = _mm_set_pd(2., 3.); - let r = _mm_min_round_sd(a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b); let e = _mm_set_pd(0., 1.); assert_eq_m128d(r, e); } @@ -52706,10 +52521,10 @@ mod tests { unsafe fn test_mm_mask_min_round_sd() { let a = _mm_set_pd(0., 1.); let b = _mm_set_pd(2., 3.); - let r = _mm_mask_min_round_sd(a, 0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); let e = _mm_set_pd(0., 1.); assert_eq_m128d(r, e); - let r = _mm_mask_min_round_sd(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); let e = _mm_set_pd(0., 1.); assert_eq_m128d(r, e); } @@ -52718,10 +52533,10 @@ mod tests { unsafe fn test_mm_maskz_min_round_sd() { let a = _mm_set_pd(0., 1.); let b = _mm_set_pd(2., 3.); - let r = _mm_maskz_min_round_sd(0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b); let e = _mm_set_pd(0., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_min_round_sd(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); let e = _mm_set_pd(0., 1.); assert_eq_m128d(r, e); } @@ -52804,7 +52619,7 @@ mod tests { unsafe fn test_mm_getexp_round_ss() { let a = _mm_set1_ps(2.); let b = _mm_set1_ps(3.); - let r = _mm_getexp_round_ss(a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b); let e = _mm_set_ps(2., 2., 2., 1.); assert_eq_m128(r, e); } @@ -52813,10 +52628,10 @@ mod tests { unsafe fn test_mm_mask_getexp_round_ss() { let a = _mm_set1_ps(2.); let b = _mm_set1_ps(3.); - let r = _mm_mask_getexp_round_ss(a, 0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); let e = _mm_set_ps(2., 2., 2., 2.); assert_eq_m128(r, e); - let r = _mm_mask_getexp_round_ss(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); let e = _mm_set_ps(2., 2., 2., 1.); assert_eq_m128(r, e); } @@ -52825,10 +52640,10 @@ mod tests { unsafe fn test_mm_maskz_getexp_round_ss() { let a = _mm_set1_ps(2.); let b = _mm_set1_ps(3.); - let r = _mm_maskz_getexp_round_ss(0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b); let e = _mm_set_ps(2., 2., 2., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_getexp_round_ss(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); let e = _mm_set_ps(2., 2., 2., 1.); assert_eq_m128(r, e); } @@ -52837,7 +52652,7 @@ mod tests { unsafe fn test_mm_getexp_round_sd() { let a = _mm_set1_pd(2.); let b = _mm_set1_pd(3.); - let r = _mm_getexp_round_sd(a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b); let e = _mm_set_pd(2., 1.); assert_eq_m128d(r, e); } @@ -52846,10 +52661,10 @@ mod tests { unsafe fn test_mm_mask_getexp_round_sd() { let a = _mm_set1_pd(2.); let b = _mm_set1_pd(3.); - let r = _mm_mask_getexp_round_sd(a, 0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); let e = _mm_set_pd(2., 2.); assert_eq_m128d(r, e); - let r = _mm_mask_getexp_round_sd(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); let e = _mm_set_pd(2., 1.); assert_eq_m128d(r, e); } @@ -52858,10 +52673,10 @@ mod tests { unsafe fn test_mm_maskz_getexp_round_sd() { let a = _mm_set1_pd(2.); let b = _mm_set1_pd(3.); - let r = _mm_maskz_getexp_round_sd(0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b); let e = _mm_set_pd(2., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_getexp_round_sd(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); let e = _mm_set_pd(2., 1.); assert_eq_m128d(r, e); } @@ -53804,7 +53619,7 @@ mod tests { unsafe fn test_mm_cvt_roundss_sd() { let a = _mm_set_pd(6., -7.5); let b = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvt_roundss_sd(a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, b); let e = _mm_set_pd(6., -1.5); assert_eq_m128d(r, e); } @@ -53813,9 +53628,9 @@ mod tests { unsafe fn test_mm_mask_cvt_roundss_sd() { let a = _mm_set_pd(6., -7.5); let b = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_mask_cvt_roundss_sd(a, 0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); assert_eq_m128d(r, a); - let r = _mm_mask_cvt_roundss_sd(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); let e = _mm_set_pd(6., -1.5); assert_eq_m128d(r, e); } @@ -53824,10 +53639,10 @@ mod tests { unsafe fn test_mm_maskz_cvt_roundss_sd() { let a = _mm_set_pd(6., -7.5); let b = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_maskz_cvt_roundss_sd(0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b); let e = _mm_set_pd(6., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_cvt_roundss_sd(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); let e = _mm_set_pd(6., -1.5); assert_eq_m128d(r, e); } @@ -53996,7 +53811,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundss_si32() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvtt_roundss_si32(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundss_si32::<_MM_FROUND_CUR_DIRECTION>(a); let e: i32 = -2; assert_eq!(r, e); } @@ -54004,7 +53819,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundss_i32() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvtt_roundss_i32(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundss_i32::<_MM_FROUND_CUR_DIRECTION>(a); let e: i32 = -2; assert_eq!(r, e); } @@ -54012,7 +53827,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundss_u32() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvtt_roundss_u32(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundss_u32::<_MM_FROUND_CUR_DIRECTION>(a); let e: u32 = u32::MAX; assert_eq!(r, e); } @@ -54036,7 +53851,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundsd_si32() { let a = _mm_set_pd(1., -1.5); - let r = _mm_cvtt_roundsd_si32(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundsd_si32::<_MM_FROUND_CUR_DIRECTION>(a); let e: i32 = -2; assert_eq!(r, e); } @@ -54044,7 +53859,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundsd_i32() { let a = _mm_set_pd(1., -1.5); - let r = _mm_cvtt_roundsd_i32(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundsd_i32::<_MM_FROUND_CUR_DIRECTION>(a); let e: i32 = -2; assert_eq!(r, e); } @@ -54052,7 +53867,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundsd_u32() { let a = _mm_set_pd(1., -1.5); - let r = _mm_cvtt_roundsd_u32(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundsd_u32::<_MM_FROUND_CUR_DIRECTION>(a); let e: u32 = u32::MAX; assert_eq!(r, e); } diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index ae6202bc73..2db8a430d4 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -5090,7 +5090,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtt_roundpd_epi32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_cvtt_roundpd_epi32(a, _MM_FROUND_NO_EXC); + let r = _mm512_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(a); let e = _mm256_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7); assert_eq_m256i(r, e); } @@ -5099,9 +5099,9 @@ mod tests { unsafe fn test_mm512_mask_cvtt_roundpd_epi32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); let src = _mm256_set1_epi32(0); - let r = _mm512_mask_cvtt_roundpd_epi32(src, 0, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(src, 0, a); assert_eq_m256i(r, src); - let r = _mm512_mask_cvtt_roundpd_epi32(src, 0b00001111, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(src, 0b00001111, a); let e = _mm256_setr_epi32(0, -1, 2, -3, 0, 0, 0, 0); assert_eq_m256i(r, e); } @@ -5109,9 +5109,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvtt_roundpd_epi32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_maskz_cvtt_roundpd_epi32(0, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm512_maskz_cvtt_roundpd_epi32(0b00001111, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(0b00001111, a); let e = _mm256_setr_epi32(0, -1, 2, -3, 0, 0, 0, 0); assert_eq_m256i(r, e); } @@ -5119,7 +5119,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtt_roundpd_epu32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_cvtt_roundpd_epu32(a, _MM_FROUND_NO_EXC); + let r = _mm512_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(a); let e = _mm256_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1); assert_eq_m256i(r, e); } @@ -5128,9 +5128,9 @@ mod tests { unsafe fn test_mm512_mask_cvtt_roundpd_epu32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); let src = _mm256_set1_epi32(0); - let r = _mm512_mask_cvtt_roundpd_epu32(src, 0, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(src, 0, a); assert_eq_m256i(r, src); - let r = _mm512_mask_cvtt_roundpd_epu32(src, 0b00001111, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(src, 0b00001111, a); let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0); assert_eq_m256i(r, e); } @@ -5138,9 +5138,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvtt_roundpd_epu32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_maskz_cvtt_roundpd_epu32(0, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm512_maskz_cvtt_roundpd_epu32(0b00001111, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(0b00001111, a); let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0); assert_eq_m256i(r, e); } @@ -6259,7 +6259,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvt_roundps_pd() { let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_cvt_roundps_pd(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(a); let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); assert_eq_m512d(r, e); } @@ -6268,9 +6268,9 @@ mod tests { unsafe fn test_mm512_mask_cvt_roundps_pd() { let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); let src = _mm512_set1_pd(0.); - let r = _mm512_mask_cvt_roundps_pd(src, 0, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_mask_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(src, 0, a); assert_eq_m512d(r, src); - let r = _mm512_mask_cvt_roundps_pd(src, 0b00001111, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_mask_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(src, 0b00001111, a); let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 0., 0., 0., 0.); assert_eq_m512d(r, e); } @@ -6278,9 +6278,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvt_roundps_pd() { let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_maskz_cvt_roundps_pd(0, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_maskz_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(0, a); assert_eq_m512d(r, _mm512_setzero_pd()); - let r = _mm512_maskz_cvt_roundps_pd(0b00001111, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_maskz_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a); let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 0., 0., 0., 0.); assert_eq_m512d(r, e); } From 01945c1f74691fd5e6f38c04a3e8acf942a909a5 Mon Sep 17 00:00:00 2001 From: jironglin Date: Wed, 3 Mar 2021 17:02:33 +0000 Subject: [PATCH 003/123] shuffle_epi32 --- crates/core_arch/src/x86/avx512f.rs | 137 +++++++++------------------- 1 file changed, 44 insertions(+), 93 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index bcd826d700..c50bd73360 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -21523,75 +21523,32 @@ pub unsafe fn _mm_mask2_permutex2var_pd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_epi32&expand=5150) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))] //should be vpshufd, but generate vpermilps -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_shuffle_epi32(a: __m512i, imm8: _MM_PERM_ENUM) -> __m512i { - let imm8 = (imm8 & 0xFF) as u8; - let a = a.as_i32x16(); - macro_rules! shuffle4 { - ( - $a:expr, - $b:expr, - $c:expr, - $d:expr, - $e:expr, - $f:expr, - $g:expr, - $h:expr, - $i:expr, - $j:expr, - $k:expr, - $l:expr, - $m:expr, - $n:expr, - $o:expr, - $p:expr - ) => { - simd_shuffle16( - a, - a, - [ - $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p, - ], - ) - }; - } - macro_rules! shuffle3 { - ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => { - match (imm8 >> 6) & 0x3 { - 0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28), - 1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29), - 2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30), - _ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31), - } - }; - } - macro_rules! shuffle2 { - ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => { - match (imm8 >> 4) & 0x3 { - 0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28), - 1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29), - 2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30), - _ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31), - } - }; - } - macro_rules! shuffle1 { - ($a:expr, $e:expr, $i: expr, $m: expr) => { - match (imm8 >> 2) & 0x3 { - 0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12), - 1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13), - 2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14), - _ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15), - } - }; - } - let r: i32x16 = match imm8 & 0x3 { - 0 => shuffle1!(0, 4, 8, 12), - 1 => shuffle1!(1, 5, 9, 13), - 2 => shuffle1!(2, 6, 10, 14), - _ => shuffle1!(3, 7, 11, 15), - }; +#[cfg_attr(test, assert_instr(vpermilps, MASK = 9))] //should be vpshufd +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_shuffle_epi32(a: __m512i) -> __m512i { + static_assert_imm8!(MASK); + let r: i32x16 = simd_shuffle16( + a.as_i32x16(), + a.as_i32x16(), + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + (MASK as u32 >> 4) & 0b11, + (MASK as u32 >> 6) & 0b11, + (MASK as u32 & 0b11) + 4, + ((MASK as u32 >> 2) & 0b11) + 4, + ((MASK as u32 >> 4) & 0b11) + 4, + ((MASK as u32 >> 6) & 0b11) + 4, + (MASK as u32 & 0b11) + 8, + ((MASK as u32 >> 2) & 0b11) + 8, + ((MASK as u32 >> 4) & 0b11) + 8, + ((MASK as u32 >> 6) & 0b11) + 8, + (MASK as u32 & 0b11) + 12, + ((MASK as u32 >> 2) & 0b11) + 12, + ((MASK as u32 >> 4) & 0b11) + 12, + ((MASK as u32 >> 6) & 0b11) + 12, + ], + ); transmute(r) } @@ -21600,20 +21557,15 @@ pub unsafe fn _mm512_shuffle_epi32(a: __m512i, imm8: _MM_PERM_ENUM) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_epi32&expand=5148) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_shuffle_epi32( +#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_shuffle_epi32( src: __m512i, k: __mmask16, a: __m512i, - imm8: _MM_PERM_ENUM, ) -> __m512i { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_epi32(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(MASK); + let r = _mm512_shuffle_epi32::(a); transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16())) } @@ -21622,15 +21574,14 @@ pub unsafe fn _mm512_mask_shuffle_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_epi32&expand=5149) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_shuffle_epi32(k: __mmask16, a: __m512i, imm8: _MM_PERM_ENUM) -> __m512i { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_epi32(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_shuffle_epi32( + k: __mmask16, + a: __m512i, +) -> __m512i { + static_assert_imm8!(MASK); + let r = _mm512_shuffle_epi32::(a); let zero = _mm512_setzero_si512().as_i32x16(); transmute(simd_select_bitmask(k, r.as_i32x16(), zero)) } @@ -47705,7 +47656,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_shuffle_epi32() { let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); - let r = _mm512_shuffle_epi32(a, _MM_PERM_AADD); + let r = _mm512_shuffle_epi32::<_MM_PERM_AADD>(a); let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9); assert_eq_m512i(r, e); } @@ -47713,9 +47664,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_shuffle_epi32() { let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); - let r = _mm512_mask_shuffle_epi32(a, 0, a, _MM_PERM_AADD); + let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a); assert_eq_m512i(r, a); - let r = _mm512_mask_shuffle_epi32(a, 0b11111111_11111111, a, _MM_PERM_AADD); + let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111_11111111, a); let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9); assert_eq_m512i(r, e); } @@ -47723,9 +47674,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_shuffle_epi32() { let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); - let r = _mm512_maskz_shuffle_epi32(0, a, _MM_PERM_AADD); + let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_shuffle_epi32(0b00000000_11111111, a, _MM_PERM_AADD); + let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00000000_11111111, a); let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); } From bfdba0503b465a62430d5330c3a511b9c67c11e5 Mon Sep 17 00:00:00 2001 From: jironglin Date: Wed, 3 Mar 2021 18:07:40 +0000 Subject: [PATCH 004/123] mm256_shuffle_epi32 --- crates/core_arch/src/x86/avx2.rs | 85 ++++++----------------------- crates/core_arch/src/x86/avx512f.rs | 40 ++++++-------- 2 files changed, 35 insertions(+), 90 deletions(-) diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs index e1fa8bc9b9..ae15fc6db6 100644 --- a/crates/core_arch/src/x86/avx2.rs +++ b/crates/core_arch/src/x86/avx2.rs @@ -2642,74 +2642,25 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_epi32) #[inline] #[target_feature(enable = "avx2")] -#[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))] -#[rustc_args_required_const(1)] +#[cfg_attr(test, assert_instr(vpermilps, MASK = 9))] +#[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_shuffle_epi32(a: __m256i, imm8: i32) -> __m256i { - // simd_shuffleX requires that its selector parameter be made up of - // constant values, but we can't enforce that here. In spirit, we need - // to write a `match` on all possible values of a byte, and for each value, - // hard-code the correct `simd_shuffleX` call using only constants. We - // then hope for LLVM to do the rest. - // - // Of course, that's... awful. So we try to use macros to do it for us. - let imm8 = (imm8 & 0xFF) as u8; - - let a = a.as_i32x8(); - macro_rules! shuffle_done { - ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => { - simd_shuffle8( - a, - a, - [ - $x01, - $x23, - $x45, - $x67, - 4 + $x01, - 4 + $x23, - 4 + $x45, - 4 + $x67, - ], - ) - }; - } - macro_rules! shuffle_x67 { - ($x01:expr, $x23:expr, $x45:expr) => { - match (imm8 >> 6) & 0b11 { - 0b00 => shuffle_done!($x01, $x23, $x45, 0), - 0b01 => shuffle_done!($x01, $x23, $x45, 1), - 0b10 => shuffle_done!($x01, $x23, $x45, 2), - _ => shuffle_done!($x01, $x23, $x45, 3), - } - }; - } - macro_rules! shuffle_x45 { - ($x01:expr, $x23:expr) => { - match (imm8 >> 4) & 0b11 { - 0b00 => shuffle_x67!($x01, $x23, 0), - 0b01 => shuffle_x67!($x01, $x23, 1), - 0b10 => shuffle_x67!($x01, $x23, 2), - _ => shuffle_x67!($x01, $x23, 3), - } - }; - } - macro_rules! shuffle_x23 { - ($x01:expr) => { - match (imm8 >> 2) & 0b11 { - 0b00 => shuffle_x45!($x01, 0), - 0b01 => shuffle_x45!($x01, 1), - 0b10 => shuffle_x45!($x01, 2), - _ => shuffle_x45!($x01, 3), - } - }; - } - let r: i32x8 = match imm8 & 0b11 { - 0b00 => shuffle_x23!(0), - 0b01 => shuffle_x23!(1), - 0b10 => shuffle_x23!(2), - _ => shuffle_x23!(3), - }; +pub unsafe fn _mm256_shuffle_epi32(a: __m256i) -> __m256i { + static_assert_imm8!(MASK); + let r: i32x8 = simd_shuffle8( + a.as_i32x8(), + a.as_i32x8(), + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + (MASK as u32 >> 4) & 0b11, + (MASK as u32 >> 6) & 0b11, + (MASK as u32 & 0b11) + 4, + ((MASK as u32 >> 2) & 0b11) + 4, + ((MASK as u32 >> 4) & 0b11) + 4, + ((MASK as u32 >> 6) & 0b11) + 4, + ], + ); transmute(r) } diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index c50bd73360..befa3047c6 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -21591,20 +21591,15 @@ pub unsafe fn _mm512_maskz_shuffle_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shuffle_epi32&expand=5145) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_mask_shuffle_epi32( +#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_mask_shuffle_epi32( src: __m256i, k: __mmask8, a: __m256i, - imm8: _MM_PERM_ENUM, ) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_epi32(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(MASK); + let r = _mm256_shuffle_epi32::(a); transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8())) } @@ -21613,15 +21608,14 @@ pub unsafe fn _mm256_mask_shuffle_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shuffle_epi32&expand=5146) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_maskz_shuffle_epi32(k: __mmask8, a: __m256i, imm8: _MM_PERM_ENUM) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_epi32(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_maskz_shuffle_epi32( + k: __mmask8, + a: __m256i, +) -> __m256i { + static_assert_imm8!(MASK); + let r = _mm256_shuffle_epi32::(a); let zero = _mm256_setzero_si256().as_i32x8(); transmute(simd_select_bitmask(k, r.as_i32x8(), zero)) } @@ -47684,9 +47678,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_mask_shuffle_epi32() { let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16); - let r = _mm256_mask_shuffle_epi32(a, 0, a, _MM_PERM_AADD); + let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a); assert_eq_m256i(r, a); - let r = _mm256_mask_shuffle_epi32(a, 0b11111111, a, _MM_PERM_AADD); + let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111, a); let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9); assert_eq_m256i(r, e); } @@ -47694,9 +47688,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_maskz_shuffle_epi32() { let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16); - let r = _mm256_maskz_shuffle_epi32(0, a, _MM_PERM_AADD); + let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_shuffle_epi32(0b11111111, a, _MM_PERM_AADD); + let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b11111111, a); let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9); assert_eq_m256i(r, e); } From 9f45c278806aeed6d04fa8159d45d62791752f35 Mon Sep 17 00:00:00 2001 From: jironglin Date: Wed, 3 Mar 2021 20:48:07 +0000 Subject: [PATCH 005/123] mm512_srai_epi32 --- crates/core_arch/src/x86/avx512f.rs | 100 ++++++++++++---------------- crates/core_arch/src/x86/macros.rs | 16 +++++ 2 files changed, 59 insertions(+), 57 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index befa3047c6..8f3c80e113 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -18338,16 +18338,12 @@ pub unsafe fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srai_epi32&expand=5436) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i { +#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_srai_epi32(a: __m512i) -> __m512i { + static_assert_imm8u!(IMM8); let a = a.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vpsraid(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vpsraid(a, IMM8); transmute(r) } @@ -18356,17 +18352,17 @@ pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srai_epi32&expand=5434) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { +#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_srai_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, +) -> __m512i { + static_assert_imm8u!(IMM8); let a = a.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vpsraid(a, $imm8) - }; - } - let shf = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, shf, src.as_i32x16())) + let r = vpsraid(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x16())) } /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18374,18 +18370,14 @@ pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srai_epi32&expand=5435) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { +#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i) -> __m512i { + static_assert_imm8u!(IMM8); let a = a.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vpsraid(a, $imm8) - }; - } - let shf = constify_imm8_sae!(imm8, call); + let r = vpsraid(a, IMM8); let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21625,20 +21617,15 @@ pub unsafe fn _mm256_maskz_shuffle_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shuffle_epi32&expand=5142) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_mask_shuffle_epi32( +#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_mask_shuffle_epi32( src: __m128i, k: __mmask8, a: __m128i, - imm8: _MM_PERM_ENUM, ) -> __m128i { - macro_rules! call { - ($imm8:expr) => { - _mm_shuffle_epi32::<$imm8>(a) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(MASK); + let r = _mm_shuffle_epi32::(a); transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4())) } @@ -21647,15 +21634,14 @@ pub unsafe fn _mm_mask_shuffle_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shuffle_epi32&expand=5143) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_maskz_shuffle_epi32(k: __mmask8, a: __m128i, imm8: _MM_PERM_ENUM) -> __m128i { - macro_rules! call { - ($imm8:expr) => { - _mm_shuffle_epi32::<$imm8>(a) - }; - } - let r = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_maskz_shuffle_epi32( + k: __mmask8, + a: __m128i, +) -> __m128i { + static_assert_imm8!(MASK); + let r = _mm_shuffle_epi32::(a); let zero = _mm_setzero_si128().as_i32x4(); transmute(simd_select_bitmask(k, r.as_i32x4(), zero)) } @@ -46913,7 +46899,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_srai_epi32() { let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15); - let r = _mm512_srai_epi32(a, 2); + let r = _mm512_srai_epi32::<2>(a); let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4); assert_eq_m512i(r, e); } @@ -46921,9 +46907,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_srai_epi32() { let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15); - let r = _mm512_mask_srai_epi32(a, 0, a, 2); + let r = _mm512_mask_srai_epi32::<2>(a, 0, a); assert_eq_m512i(r, a); - let r = _mm512_mask_srai_epi32(a, 0b11111111_11111111, a, 2); + let r = _mm512_mask_srai_epi32::<2>(a, 0b11111111_11111111, a); let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4); assert_eq_m512i(r, e); } @@ -46931,9 +46917,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_srai_epi32() { let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15); - let r = _mm512_maskz_srai_epi32(0, a, 2); + let r = _mm512_maskz_srai_epi32::<2>(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_srai_epi32(0b00000000_11111111, a, 2); + let r = _mm512_maskz_srai_epi32::<2>(0b00000000_11111111, a); let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4); assert_eq_m512i(r, e); } @@ -47698,9 +47684,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_mask_shuffle_epi32() { let a = _mm_set_epi32(1, 4, 5, 8); - let r = _mm_mask_shuffle_epi32(a, 0, a, _MM_PERM_AADD); + let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a); assert_eq_m128i(r, a); - let r = _mm_mask_shuffle_epi32(a, 0b00001111, a, _MM_PERM_AADD); + let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b00001111, a); let e = _mm_set_epi32(8, 8, 1, 1); assert_eq_m128i(r, e); } @@ -47708,9 +47694,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_maskz_shuffle_epi32() { let a = _mm_set_epi32(1, 4, 5, 8); - let r = _mm_maskz_shuffle_epi32(0, a, _MM_PERM_AADD); + let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_shuffle_epi32(0b00001111, a, _MM_PERM_AADD); + let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00001111, a); let e = _mm_set_epi32(8, 8, 1, 1); assert_eq_m128i(r, e); } diff --git a/crates/core_arch/src/x86/macros.rs b/crates/core_arch/src/x86/macros.rs index e659ac3da8..ecb7085d18 100644 --- a/crates/core_arch/src/x86/macros.rs +++ b/crates/core_arch/src/x86/macros.rs @@ -32,6 +32,22 @@ macro_rules! static_assert_sae { }; } +// Helper struct used to trigger const eval errors when the const generic immediate value `imm` is +// out of `bits`-bit range. +pub(crate) struct ValidateConstImmU; +impl ValidateConstImmU { + pub(crate) const VALID: () = { + let _ = 1 / ((IMM < (1 << BITS)) as usize); + }; +} + +#[allow(unused)] +macro_rules! static_assert_imm8u { + ($imm:ident) => { + let _ = $crate::core_arch::x86::macros::ValidateConstImmU::<$imm, 8>::VALID; + }; +} + macro_rules! constify_imm6 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] From bc193cdae13d63e375c81f73377b4b18dcdfd8c9 Mon Sep 17 00:00:00 2001 From: jironglin Date: Wed, 3 Mar 2021 21:01:18 +0000 Subject: [PATCH 006/123] fix macro --- crates/core_arch/src/x86/macros.rs | 40 ------------------------------ 1 file changed, 40 deletions(-) diff --git a/crates/core_arch/src/x86/macros.rs b/crates/core_arch/src/x86/macros.rs index ecb7085d18..1c02de24a7 100644 --- a/crates/core_arch/src/x86/macros.rs +++ b/crates/core_arch/src/x86/macros.rs @@ -48,46 +48,6 @@ macro_rules! static_assert_imm8u { }; } -macro_rules! constify_imm6 { - ($imm8:expr, $expand:ident) => { - #[allow(overflowing_literals)] - match ($imm8) & 0b1_1111 { - 0 => $expand!(0), - 1 => $expand!(1), - 2 => $expand!(2), - 3 => $expand!(3), - 4 => $expand!(4), - 5 => $expand!(5), - 6 => $expand!(6), - 7 => $expand!(7), - 8 => $expand!(8), - 9 => $expand!(9), - 10 => $expand!(10), - 11 => $expand!(11), - 12 => $expand!(12), - 13 => $expand!(13), - 14 => $expand!(14), - 15 => $expand!(15), - 16 => $expand!(16), - 17 => $expand!(17), - 18 => $expand!(18), - 19 => $expand!(19), - 20 => $expand!(20), - 21 => $expand!(21), - 22 => $expand!(22), - 23 => $expand!(23), - 24 => $expand!(24), - 25 => $expand!(25), - 26 => $expand!(26), - 27 => $expand!(27), - 28 => $expand!(28), - 29 => $expand!(29), - 30 => $expand!(30), - _ => $expand!(31), - } - }; -} - #[allow(unused_macros)] macro_rules! constify_imm4 { ($imm8:expr, $expand:ident) => { From 29debd5622d4c8f7b4331471e6ef4f73387e70d9 Mon Sep 17 00:00:00 2001 From: jironglin Date: Wed, 3 Mar 2021 23:09:01 +0000 Subject: [PATCH 007/123] shuffle_i32x4 --- crates/core_arch/src/x86/avx512f.rs | 223 +++++++++------------------- 1 file changed, 72 insertions(+), 151 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 8f3c80e113..cdc3f2d003 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -21909,78 +21909,34 @@ pub unsafe fn _mm_maskz_shuffle_pd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shuffle_i32&expand=5177) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10010101))] //should be vshufi32x4 -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_shuffle_i32x4(a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); - let imm8 = (imm8 & 0xFF) as u8; +#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10010101))] //should be vshufi32x4 +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_shuffle_i32x4(a: __m512i, b: __m512i) -> __m512i { + static_assert_imm8!(MASK); let a = a.as_i32x16(); let b = b.as_i32x16(); - macro_rules! shuffle4 { - ( - $a:expr, - $b:expr, - $c:expr, - $d:expr, - $e:expr, - $f:expr, - $g:expr, - $h:expr, - $i:expr, - $j:expr, - $k:expr, - $l:expr, - $m:expr, - $n:expr, - $o:expr, - $p:expr - ) => { - simd_shuffle16( - a, - b, - [ - $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p, - ], - ) - }; - } - macro_rules! shuffle3 { - ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => { - match (imm8 >> 6) & 0x3 { - 0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19), - 1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23), - 2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27), - _ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31), - } - }; - } - macro_rules! shuffle2 { - ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => { - match (imm8 >> 4) & 0x3 { - 0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19), - 1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23), - 2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27), - _ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31), - } - }; - } - macro_rules! shuffle1 { - ($a:expr, $e:expr, $i: expr, $m: expr) => { - match (imm8 >> 2) & 0x3 { - 0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3), - 1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7), - 2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11), - _ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15), - } - }; - } - let r: i32x16 = match imm8 & 0x3 { - 0 => shuffle1!(0, 1, 2, 3), - 1 => shuffle1!(4, 5, 6, 7), - 2 => shuffle1!(8, 9, 10, 11), - _ => shuffle1!(12, 13, 14, 15), - }; - + let r: i32x16 = simd_shuffle16( + a, + b, + [ + (MASK as u32 & 0b11) * 4 + 0, + (MASK as u32 & 0b11) * 4 + 1, + (MASK as u32 & 0b11) * 4 + 2, + (MASK as u32 & 0b11) * 4 + 3, + ((MASK as u32 >> 2) & 0b11) * 4 + 0, + ((MASK as u32 >> 2) & 0b11) * 4 + 1, + ((MASK as u32 >> 2) & 0b11) * 4 + 2, + ((MASK as u32 >> 2) & 0b11) * 4 + 3, + ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16, + ], + ); transmute(r) } @@ -21989,21 +21945,15 @@ pub unsafe fn _mm512_shuffle_i32x4(a: __m512i, b: __m512i, imm8: i32) -> __m512i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_i32x&expand=5175) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufi32x4, imm8 = 0b10110101))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_shuffle_i32x4( +#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10110101))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_shuffle_i32x4( src: __m512i, k: __mmask16, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_i32x4(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = _mm512_shuffle_i32x4::(a, b); transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16())) } @@ -22012,20 +21962,14 @@ pub unsafe fn _mm512_mask_shuffle_i32x4( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_i32&expand=5176) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufi32x4, imm8 = 0b10110101))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_shuffle_i32x4( +#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10110101))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_shuffle_i32x4( k: __mmask16, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_i32x4(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = _mm512_shuffle_i32x4::(a, b); let zero = _mm512_setzero_si512().as_i32x16(); transmute(simd_select_bitmask(k, r.as_i32x16(), zero)) } @@ -22035,39 +21979,26 @@ pub unsafe fn _mm512_maskz_shuffle_i32x4( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_i32x4&expand=5174) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vperm, imm8 = 0b01))] //should be vshufi32x4 -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_shuffle_i32x4(a: __m256i, b: __m256i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); - let imm8 = (imm8 & 0xFF) as u8; +#[cfg_attr(test, assert_instr(vperm, MASK = 0b1001))] //should be vshufi32x4 +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_shuffle_i32x4(a: __m256i, b: __m256i) -> __m256i { + static_assert_imm8!(MASK); let a = a.as_i32x8(); let b = b.as_i32x8(); - macro_rules! shuffle2 { - ( - $a:expr, - $b:expr, - $c:expr, - $d:expr, - $e:expr, - $f:expr, - $g:expr, - $h:expr - ) => { - simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]) - }; - } - macro_rules! shuffle1 { - ($a:expr, $b:expr, $c: expr, $d: expr) => { - match (imm8 >> 1) & 0x1 { - 0 => shuffle2!($a, $b, $c, $d, 8, 9, 10, 11), - _ => shuffle2!($a, $b, $c, $d, 12, 13, 14, 15), - } - }; - } - let r: i32x8 = match imm8 & 0x1 { - 0 => shuffle1!(0, 1, 2, 3), - _ => shuffle1!(4, 5, 6, 7), - }; + let r: i32x8 = simd_shuffle8( + a, + b, + [ + (MASK as u32 & 0b1) * 4 + 0, + (MASK as u32 & 0b1) * 4 + 1, + (MASK as u32 & 0b1) * 4 + 2, + (MASK as u32 & 0b1) * 4 + 3, + ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8, + ], + ); transmute(r) } @@ -22076,21 +22007,16 @@ pub unsafe fn _mm256_shuffle_i32x4(a: __m256i, b: __m256i, imm8: i32) -> __m256i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shuffle_i32x4&expand=5172) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshufi32x4, imm8 = 0b11))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_shuffle_i32x4( +#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b1101))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_shuffle_i32x4( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_i32x4(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(MASK); + let r = _mm256_shuffle_i32x4::(a, b); transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8())) } @@ -22099,20 +22025,15 @@ pub unsafe fn _mm256_mask_shuffle_i32x4( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shuffle_i32x4&expand=5173) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshufi32x4, imm8 = 0b11))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_maskz_shuffle_i32x4( +#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b1101))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_maskz_shuffle_i32x4( k: __mmask8, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_i32x4(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(MASK); + let r = _mm256_shuffle_i32x4::(a, b); let zero = _mm256_setzero_si256().as_i32x8(); transmute(simd_select_bitmask(k, r.as_i32x8(), zero)) } @@ -47798,7 +47719,7 @@ mod tests { unsafe fn test_mm512_shuffle_i32x4() { let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm512_shuffle_i32x4(a, b, 0b0000); + let r = _mm512_shuffle_i32x4::<0b0000>(a, b); let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7); assert_eq_m512i(r, e); } @@ -47807,9 +47728,9 @@ mod tests { unsafe fn test_mm512_mask_shuffle_i32x4() { let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm512_mask_shuffle_i32x4(a, 0, a, b, 0b0000); + let r = _mm512_mask_shuffle_i32x4::<0b0000>(a, 0, a, b); assert_eq_m512i(r, a); - let r = _mm512_mask_shuffle_i32x4(a, 0b11111111_11111111, a, b, 0b0000); + let r = _mm512_mask_shuffle_i32x4::<0b0000>(a, 0b11111111_11111111, a, b); let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7); assert_eq_m512i(r, e); } @@ -47818,9 +47739,9 @@ mod tests { unsafe fn test_mm512_maskz_shuffle_i32x4() { let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm512_maskz_shuffle_i32x4(0, a, b, 0b0000); + let r = _mm512_maskz_shuffle_i32x4::<0b0000>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_shuffle_i32x4(0b00000000_11111111, a, b, 0b0000); + let r = _mm512_maskz_shuffle_i32x4::<0b0000>(0b00000000_11111111, a, b); let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); } @@ -47829,7 +47750,7 @@ mod tests { unsafe fn test_mm256_shuffle_i32x4() { let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16); let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm256_shuffle_i32x4(a, b, 0b00); + let r = _mm256_shuffle_i32x4::<0b00>(a, b); let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16); assert_eq_m256i(r, e); } @@ -47838,9 +47759,9 @@ mod tests { unsafe fn test_mm256_mask_shuffle_i32x4() { let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16); let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm256_mask_shuffle_i32x4(a, 0, a, b, 0b00); + let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0, a, b); assert_eq_m256i(r, a); - let r = _mm256_mask_shuffle_i32x4(a, 0b11111111, a, b, 0b00); + let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0b11111111, a, b); let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16); assert_eq_m256i(r, e); } @@ -47849,9 +47770,9 @@ mod tests { unsafe fn test_mm256_maskz_shuffle_i32x4() { let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16); let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm256_maskz_shuffle_i32x4(0, a, b, 0b00); + let r = _mm256_maskz_shuffle_i32x4::<0b00>(0, a, b); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_shuffle_i32x4(0b11111111, a, b, 0b00); + let r = _mm256_maskz_shuffle_i32x4::<0b00>(0b11111111, a, b); let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16); assert_eq_m256i(r, e); } From 2bef43e07032f0b2d6b5c2ea14e068676a856492 Mon Sep 17 00:00:00 2001 From: jironglin Date: Wed, 3 Mar 2021 23:44:49 +0000 Subject: [PATCH 008/123] shuffle_f32x4 --- crates/core_arch/src/x86/avx512f.rs | 254 ++++++++++------------------ 1 file changed, 94 insertions(+), 160 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index cdc3f2d003..b5d49b8677 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -21909,7 +21909,7 @@ pub unsafe fn _mm_maskz_shuffle_pd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shuffle_i32&expand=5177) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10010101))] //should be vshufi32x4 +#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_01_01_01))] //should be vshufi32x4 #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_shuffle_i32x4(a: __m512i, b: __m512i) -> __m512i { static_assert_imm8!(MASK); @@ -21945,7 +21945,7 @@ pub unsafe fn _mm512_shuffle_i32x4(a: __m512i, b: __m512i) -> _ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_i32x&expand=5175) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10110101))] +#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))] #[rustc_legacy_const_generics(4)] pub unsafe fn _mm512_mask_shuffle_i32x4( src: __m512i, @@ -21962,7 +21962,7 @@ pub unsafe fn _mm512_mask_shuffle_i32x4( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_i32&expand=5176) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10110101))] +#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))] #[rustc_legacy_const_generics(3)] pub unsafe fn _mm512_maskz_shuffle_i32x4( k: __mmask16, @@ -21979,7 +21979,7 @@ pub unsafe fn _mm512_maskz_shuffle_i32x4( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_i32x4&expand=5174) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vperm, MASK = 0b1001))] //should be vshufi32x4 +#[cfg_attr(test, assert_instr(vperm, MASK = 0b11))] //should be vshufi32x4 #[rustc_legacy_const_generics(2)] pub unsafe fn _mm256_shuffle_i32x4(a: __m256i, b: __m256i) -> __m256i { static_assert_imm8!(MASK); @@ -22007,7 +22007,7 @@ pub unsafe fn _mm256_shuffle_i32x4(a: __m256i, b: __m256i) -> _ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shuffle_i32x4&expand=5172) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b1101))] +#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))] #[rustc_legacy_const_generics(4)] pub unsafe fn _mm256_mask_shuffle_i32x4( src: __m256i, @@ -22025,7 +22025,7 @@ pub unsafe fn _mm256_mask_shuffle_i32x4( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shuffle_i32x4&expand=5173) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b1101))] +#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))] #[rustc_legacy_const_generics(3)] pub unsafe fn _mm256_maskz_shuffle_i32x4( k: __mmask8, @@ -22234,75 +22234,35 @@ pub unsafe fn _mm256_maskz_shuffle_i64x2( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shuffle_f32x4&expand=5165) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))] //should be vshuff32x4, but generate vshuff64x2 -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_shuffle_f32x4(a: __m512, b: __m512, imm8: i32) -> __m512 { - assert!(imm8 >= 0 && imm8 <= 255); - let imm8 = (imm8 & 0xFF) as u8; - macro_rules! shuffle4 { - ( - $a:expr, - $b:expr, - $c:expr, - $d:expr, - $e:expr, - $f:expr, - $g:expr, - $h:expr, - $i:expr, - $j:expr, - $k:expr, - $l:expr, - $m:expr, - $n:expr, - $o:expr, - $p:expr - ) => { - simd_shuffle16( - a, - b, - [ - $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p, - ], - ) - }; - } - macro_rules! shuffle3 { - ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => { - match (imm8 >> 6) & 0x3 { - 0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19), - 1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23), - 2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27), - _ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31), - } - }; - } - macro_rules! shuffle2 { - ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => { - match (imm8 >> 4) & 0x3 { - 0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19), - 1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23), - 2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27), - _ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31), - } - }; - } - macro_rules! shuffle1 { - ($a:expr, $e:expr, $i: expr, $m: expr) => { - match (imm8 >> 2) & 0x3 { - 0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3), - 1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7), - 2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11), - _ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15), - } - }; - } - match imm8 & 0x3 { - 0 => shuffle1!(0, 1, 2, 3), - 1 => shuffle1!(4, 5, 6, 7), - 2 => shuffle1!(8, 9, 10, 11), - _ => shuffle1!(12, 13, 14, 15), - } +#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b1011))] //should be vshuff32x4, but generate vshuff64x2 +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_shuffle_f32x4(a: __m512, b: __m512) -> __m512 { + static_assert_imm8!(MASK); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r: f32x16 = simd_shuffle16( + a, + b, + [ + (MASK as u32 & 0b11) * 4 + 0, + (MASK as u32 & 0b11) * 4 + 1, + (MASK as u32 & 0b11) * 4 + 2, + (MASK as u32 & 0b11) * 4 + 3, + ((MASK as u32 >> 2) & 0b11) * 4 + 0, + ((MASK as u32 >> 2) & 0b11) * 4 + 1, + ((MASK as u32 >> 2) & 0b11) * 4 + 2, + ((MASK as u32 >> 2) & 0b11) * 4 + 3, + ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16, + ], + ); + transmute(r) } /// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22310,21 +22270,16 @@ pub unsafe fn _mm512_shuffle_f32x4(a: __m512, b: __m512, imm8: i32) -> __m512 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_f32&expand=5163) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshuff32x4, imm8 = 0b10111111))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_shuffle_f32x4( +#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_shuffle_f32x4( src: __m512, k: __mmask16, a: __m512, b: __m512, - imm8: i32, ) -> __m512 { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_f32x4(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(MASK); + let r = _mm512_shuffle_f32x4::(a, b); transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16())) } @@ -22333,15 +22288,15 @@ pub unsafe fn _mm512_mask_shuffle_f32x4( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_f32&expand=5164) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshuff32x4, imm8 = 0b10111111))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_shuffle_f32x4(k: __mmask16, a: __m512, b: __m512, imm8: i32) -> __m512 { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_f32x4(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_shuffle_f32x4( + k: __mmask16, + a: __m512, + b: __m512, +) -> __m512 { + static_assert_imm8!(MASK); + let r = _mm512_shuffle_f32x4::(a, b); let zero = _mm512_setzero_ps().as_f32x16(); transmute(simd_select_bitmask(k, r.as_f32x16(), zero)) } @@ -22351,40 +22306,26 @@ pub unsafe fn _mm512_maskz_shuffle_f32x4(k: __mmask16, a: __m512, b: __m512, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_f32x4&expand=5162) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vperm, imm8 = 0b01))] //should be vshuff32x4 -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_shuffle_f32x4(a: __m256, b: __m256, imm8: i32) -> __m256 { - assert!(imm8 >= 0 && imm8 <= 255); - let imm8 = (imm8 & 0xFF) as u8; +#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff32x4 +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_shuffle_f32x4(a: __m256, b: __m256) -> __m256 { + static_assert_imm8!(MASK); let a = a.as_f32x8(); let b = b.as_f32x8(); - macro_rules! shuffle2 { - ( - $a:expr, - $b:expr, - $c:expr, - $d:expr, - $e:expr, - $f:expr, - $g:expr, - $h:expr - ) => { - simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]) - }; - } - macro_rules! shuffle1 { - ($a:expr, $b:expr, $c: expr, $d: expr) => { - match (imm8 >> 1) & 0x1 { - 0 => shuffle2!($a, $b, $c, $d, 8, 9, 10, 11), - _ => shuffle2!($a, $b, $c, $d, 12, 13, 14, 15), - } - }; - } - let r: f32x8 = match imm8 & 0x1 { - 0 => shuffle1!(0, 1, 2, 3), - _ => shuffle1!(4, 5, 6, 7), - }; - + let r: f32x8 = simd_shuffle8( + a, + b, + [ + (MASK as u32 & 0b1) * 4 + 0, + (MASK as u32 & 0b1) * 4 + 1, + (MASK as u32 & 0b1) * 4 + 2, + (MASK as u32 & 0b1) * 4 + 3, + ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8, + ], + ); transmute(r) } @@ -22393,21 +22334,15 @@ pub unsafe fn _mm256_shuffle_f32x4(a: __m256, b: __m256, imm8: i32) -> __m256 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shuffle_f32x4&expand=5160) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshuff32x4, imm8 = 0b11))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_shuffle_f32x4( +#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_shuffle_f32x4( src: __m256, k: __mmask8, a: __m256, b: __m256, - imm8: i32, ) -> __m256 { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_f32x4(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = _mm256_shuffle_f32x4::(a, b); transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8())) } @@ -22416,15 +22351,14 @@ pub unsafe fn _mm256_mask_shuffle_f32x4( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shuffle_f32x4&expand=5161) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshuff32x4, imm8 = 0b11))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_maskz_shuffle_f32x4(k: __mmask8, a: __m256, b: __m256, imm8: i32) -> __m256 { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_f32x4(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_maskz_shuffle_f32x4( + k: __mmask8, + a: __m256, + b: __m256, +) -> __m256 { + let r = _mm256_shuffle_f32x4::(a, b); let zero = _mm256_setzero_ps().as_f32x8(); transmute(simd_select_bitmask(k, r.as_f32x8(), zero)) } @@ -47719,7 +47653,7 @@ mod tests { unsafe fn test_mm512_shuffle_i32x4() { let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm512_shuffle_i32x4::<0b0000>(a, b); + let r = _mm512_shuffle_i32x4::<0b00_00_00_00>(a, b); let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7); assert_eq_m512i(r, e); } @@ -47728,9 +47662,9 @@ mod tests { unsafe fn test_mm512_mask_shuffle_i32x4() { let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm512_mask_shuffle_i32x4::<0b0000>(a, 0, a, b); + let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0, a, b); assert_eq_m512i(r, a); - let r = _mm512_mask_shuffle_i32x4::<0b0000>(a, 0b11111111_11111111, a, b); + let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b); let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7); assert_eq_m512i(r, e); } @@ -47739,9 +47673,9 @@ mod tests { unsafe fn test_mm512_maskz_shuffle_i32x4() { let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm512_maskz_shuffle_i32x4::<0b0000>(0, a, b); + let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_shuffle_i32x4::<0b0000>(0b00000000_11111111, a, b); + let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0b00000000_11111111, a, b); let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); } @@ -47785,7 +47719,7 @@ mod tests { let b = _mm512_setr_ps( 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15., ); - let r = _mm512_shuffle_f32x4(a, b, 0b00000000); + let r = _mm512_shuffle_f32x4::<0b00_00_00_00>(a, b); let e = _mm512_setr_ps( 1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7., ); @@ -47800,9 +47734,9 @@ mod tests { let b = _mm512_setr_ps( 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15., ); - let r = _mm512_mask_shuffle_f32x4(a, 0, a, b, 0b00000000); + let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0, a, b); assert_eq_m512(r, a); - let r = _mm512_mask_shuffle_f32x4(a, 0b11111111_11111111, a, b, 0b00000000); + let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b); let e = _mm512_setr_ps( 1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7., ); @@ -47817,9 +47751,9 @@ mod tests { let b = _mm512_setr_ps( 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15., ); - let r = _mm512_maskz_shuffle_f32x4(0, a, b, 0b00000000); + let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0, a, b); assert_eq_m512(r, _mm512_setzero_ps()); - let r = _mm512_maskz_shuffle_f32x4(0b00000000_11111111, a, b, 0b00000000); + let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0b00000000_11111111, a, b); let e = _mm512_setr_ps( 1., 4., 5., 8., 1., 4., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0., ); @@ -47830,7 +47764,7 @@ mod tests { unsafe fn test_mm256_shuffle_f32x4() { let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.); let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.); - let r = _mm256_shuffle_f32x4(a, b, 0b00); + let r = _mm256_shuffle_f32x4::<0b00>(a, b); let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.); assert_eq_m256(r, e); } @@ -47839,9 +47773,9 @@ mod tests { unsafe fn test_mm256_mask_shuffle_f32x4() { let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.); let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.); - let r = _mm256_mask_shuffle_f32x4(a, 0, a, b, 0b00); + let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0, a, b); assert_eq_m256(r, a); - let r = _mm256_mask_shuffle_f32x4(a, 0b11111111, a, b, 0b00); + let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0b11111111, a, b); let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.); assert_eq_m256(r, e); } @@ -47850,9 +47784,9 @@ mod tests { unsafe fn test_mm256_maskz_shuffle_f32x4() { let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.); let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.); - let r = _mm256_maskz_shuffle_f32x4(0, a, b, 0b00); + let r = _mm256_maskz_shuffle_f32x4::<0b00>(0, a, b); assert_eq_m256(r, _mm256_setzero_ps()); - let r = _mm256_maskz_shuffle_f32x4(0b11111111, a, b, 0b00); + let r = _mm256_maskz_shuffle_f32x4::<0b00>(0b11111111, a, b); let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.); assert_eq_m256(r, e); } From fa7c938465ae49b8479e959073be38b0496bb465 Mon Sep 17 00:00:00 2001 From: jironglin Date: Thu, 4 Mar 2021 00:18:12 +0000 Subject: [PATCH 009/123] shuffle_i64x2 --- crates/core_arch/src/x86/avx512f.rs | 173 ++++++++----------------- crates/core_arch/src/x86_64/avx512f.rs | 20 +-- 2 files changed, 61 insertions(+), 132 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index b5d49b8677..c00dbaea21 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -22043,61 +22043,27 @@ pub unsafe fn _mm256_maskz_shuffle_i32x4( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shuffle_i64x2&expand=5183) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_shuffle_i64x2(a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); - let imm8 = (imm8 & 0xFF) as u8; - macro_rules! shuffle4 { - ( - $a:expr, - $b:expr, - $c:expr, - $d:expr, - $e:expr, - $f:expr, - $g:expr, - $h:expr - ) => { - simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]) - }; - } - macro_rules! shuffle3 { - ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => { - match (imm8 >> 6) & 0x3 { - 0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9), - 1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11), - 2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13), - _ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15), - } - }; - } - macro_rules! shuffle2 { - ($a:expr, $b:expr, $e:expr, $f:expr) => { - match (imm8 >> 4) & 0x3 { - 0 => shuffle3!($a, $b, $e, $f, 8, 9), - 1 => shuffle3!($a, $b, $e, $f, 10, 11), - 2 => shuffle3!($a, $b, $e, $f, 12, 13), - _ => shuffle3!($a, $b, $e, $f, 14, 15), - } - }; - } - macro_rules! shuffle1 { - ($a:expr, $e:expr) => { - match (imm8 >> 2) & 0x3 { - 0 => shuffle2!($a, $e, 0, 1), - 1 => shuffle2!($a, $e, 2, 3), - 2 => shuffle2!($a, $e, 4, 5), - _ => shuffle2!($a, $e, 6, 7), - } - }; - } - match imm8 & 0x3 { - 0 => shuffle1!(0, 1), - 1 => shuffle1!(2, 3), - 2 => shuffle1!(4, 5), - _ => shuffle1!(6, 7), - } +#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_shuffle_i64x2(a: __m512i, b: __m512i) -> __m512i { + static_assert_imm8!(MASK); + let a = a.as_i64x8(); + let b = b.as_i64x8(); + let r: i64x8 = simd_shuffle8( + a, + b, + [ + (MASK as u32 & 0b11) * 2 + 0, + (MASK as u32 & 0b11) * 2 + 1, + ((MASK as u32 >> 2) & 0b11) * 2 + 0, + ((MASK as u32 >> 2) & 0b11) * 2 + 1, + ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8, + ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8, + ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8, + ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8, + ], + ); + transmute(r) } /// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22105,21 +22071,15 @@ pub unsafe fn _mm512_shuffle_i64x2(a: __m512i, b: __m512i, imm8: i32) -> __m512i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_i64x&expand=5181) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_shuffle_i64x2( +#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_shuffle_i64x2( src: __m512i, k: __mmask8, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_i64x2(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = _mm512_shuffle_i64x2::(a, b); transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8())) } @@ -22128,20 +22088,14 @@ pub unsafe fn _mm512_mask_shuffle_i64x2( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_i64&expand=5182) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_shuffle_i64x2( +#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_shuffle_i64x2( k: __mmask8, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_i64x2(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = _mm512_shuffle_i64x2::(a, b); let zero = _mm512_setzero_si512().as_i64x8(); transmute(simd_select_bitmask(k, r.as_i64x8(), zero)) } @@ -22151,35 +22105,22 @@ pub unsafe fn _mm512_maskz_shuffle_i64x2( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_i64x2&expand=5180) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vperm, imm8 = 0b01))] //should be vshufi64x2 -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_shuffle_i64x2(a: __m256i, b: __m256i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); - let imm8 = (imm8 & 0xFF) as u8; +#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshufi64x2 +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_shuffle_i64x2(a: __m256i, b: __m256i) -> __m256i { + static_assert_imm8!(MASK); let a = a.as_i64x4(); let b = b.as_i64x4(); - macro_rules! shuffle2 { - ( - $a:expr, - $b:expr, - $c:expr, - $d:expr - ) => { - simd_shuffle4(a, b, [$a, $b, $c, $d]) - }; - } - macro_rules! shuffle1 { - ($a:expr, $b:expr) => { - match (imm8 >> 1) & 0x1 { - 0 => shuffle2!($a, $b, 4, 5), - _ => shuffle2!($a, $b, 6, 7), - } - }; - } - let r: i64x4 = match imm8 & 0x1 { - 0 => shuffle1!(0, 1), - _ => shuffle1!(2, 3), - }; + let r: i64x4 = simd_shuffle4( + a, + b, + [ + (MASK as u32 & 0b1) * 2 + 0, + (MASK as u32 & 0b1) * 2 + 1, + ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4, + ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4, + ], + ); transmute(r) } @@ -22188,21 +22129,15 @@ pub unsafe fn _mm256_shuffle_i64x2(a: __m256i, b: __m256i, imm8: i32) -> __m256i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shuffle_i64x2&expand=5178) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b11))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_shuffle_i64x2( +#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_shuffle_i64x2( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_i64x2(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = _mm256_shuffle_i64x2::(a, b); transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4())) } @@ -22211,20 +22146,14 @@ pub unsafe fn _mm256_mask_shuffle_i64x2( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shuffle_i64x2&expand=5179) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b11))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_maskz_shuffle_i64x2( +#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_maskz_shuffle_i64x2( k: __mmask8, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_i64x2(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = _mm256_shuffle_i64x2::(a, b); let zero = _mm256_setzero_si256().as_i64x4(); transmute(simd_select_bitmask(k, r.as_i64x4(), zero)) } diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index 2db8a430d4..6d816b86c3 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -9658,7 +9658,7 @@ mod tests { unsafe fn test_mm512_shuffle_i64x2() { let a = _mm512_setr_epi64(1, 4, 5, 8, 9, 12, 13, 16); let b = _mm512_setr_epi64(2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm512_shuffle_i64x2(a, b, 0b00000000); + let r = _mm512_shuffle_i64x2::<0b00_00_00_00>(a, b); let e = _mm512_setr_epi64(1, 4, 1, 4, 2, 3, 2, 3); assert_eq_m512i(r, e); } @@ -9667,9 +9667,9 @@ mod tests { unsafe fn test_mm512_mask_shuffle_i64x2() { let a = _mm512_setr_epi64(1, 4, 5, 8, 9, 12, 13, 16); let b = _mm512_setr_epi64(2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm512_mask_shuffle_i64x2(a, 0, a, b, 0b00000000); + let r = _mm512_mask_shuffle_i64x2::<0b00_00_00_00>(a, 0, a, b); assert_eq_m512i(r, a); - let r = _mm512_mask_shuffle_i64x2(a, 0b11111111, a, b, 0b00000000); + let r = _mm512_mask_shuffle_i64x2::<0b00_00_00_00>(a, 0b11111111, a, b); let e = _mm512_setr_epi64(1, 4, 1, 4, 2, 3, 2, 3); assert_eq_m512i(r, e); } @@ -9678,9 +9678,9 @@ mod tests { unsafe fn test_mm512_maskz_shuffle_i64x2() { let a = _mm512_setr_epi64(1, 4, 5, 8, 9, 12, 13, 16); let b = _mm512_setr_epi64(2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm512_maskz_shuffle_i64x2(0, a, b, 0b00000000); + let r = _mm512_maskz_shuffle_i64x2::<0b00_00_00_00>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_shuffle_i64x2(0b00001111, a, b, 0b00000000); + let r = _mm512_maskz_shuffle_i64x2::<0b00_00_00_00>(0b00001111, a, b); let e = _mm512_setr_epi64(1, 4, 1, 4, 0, 0, 0, 0); assert_eq_m512i(r, e); } @@ -9689,7 +9689,7 @@ mod tests { unsafe fn test_mm256_shuffle_i64x2() { let a = _mm256_set_epi64x(1, 4, 5, 8); let b = _mm256_set_epi64x(2, 3, 6, 7); - let r = _mm256_shuffle_i64x2(a, b, 0b00); + let r = _mm256_shuffle_i64x2::<0b00>(a, b); let e = _mm256_set_epi64x(6, 7, 5, 8); assert_eq_m256i(r, e); } @@ -9698,9 +9698,9 @@ mod tests { unsafe fn test_mm256_mask_shuffle_i64x2() { let a = _mm256_set_epi64x(1, 4, 5, 8); let b = _mm256_set_epi64x(2, 3, 6, 7); - let r = _mm256_mask_shuffle_i64x2(a, 0, a, b, 0b00); + let r = _mm256_mask_shuffle_i64x2::<0b00>(a, 0, a, b); assert_eq_m256i(r, a); - let r = _mm256_mask_shuffle_i64x2(a, 0b00001111, a, b, 0b00); + let r = _mm256_mask_shuffle_i64x2::<0b00>(a, 0b00001111, a, b); let e = _mm256_set_epi64x(6, 7, 5, 8); assert_eq_m256i(r, e); } @@ -9709,9 +9709,9 @@ mod tests { unsafe fn test_mm256_maskz_shuffle_i64x2() { let a = _mm256_set_epi64x(1, 4, 5, 8); let b = _mm256_set_epi64x(2, 3, 6, 7); - let r = _mm256_maskz_shuffle_i64x2(0, a, b, 0b00); + let r = _mm256_maskz_shuffle_i64x2::<0b00>(0, a, b); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_shuffle_i64x2(0b00001111, a, b, 0b00); + let r = _mm256_maskz_shuffle_i64x2::<0b00>(0b00001111, a, b); let e = _mm256_set_epi64x(6, 7, 5, 8); assert_eq_m256i(r, e); } From b8c1bd7cedfd211333f8eb02a55d737bae7261bf Mon Sep 17 00:00:00 2001 From: jironglin Date: Thu, 4 Mar 2021 00:41:22 +0000 Subject: [PATCH 010/123] shuffle_f64x2 --- crates/core_arch/src/x86/avx512f.rs | 197 ++++++++++--------------- crates/core_arch/src/x86_64/avx512f.rs | 20 +-- 2 files changed, 85 insertions(+), 132 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index c00dbaea21..5abe23e093 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -21692,6 +21692,7 @@ pub unsafe fn _mm512_mask_shuffle_ps( a: __m512, b: __m512, ) -> __m512 { + static_assert_imm8!(MASK); let r = _mm512_shuffle_ps::(a, b); transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16())) } @@ -21708,6 +21709,7 @@ pub unsafe fn _mm512_maskz_shuffle_ps( a: __m512, b: __m512, ) -> __m512 { + static_assert_imm8!(MASK); let r = _mm512_shuffle_ps::(a, b); let zero = _mm512_setzero_ps().as_f32x16(); transmute(simd_select_bitmask(k, r.as_f32x16(), zero)) @@ -21726,6 +21728,7 @@ pub unsafe fn _mm256_mask_shuffle_ps( a: __m256, b: __m256, ) -> __m256 { + static_assert_imm8!(MASK); let r = _mm256_shuffle_ps::(a, b); transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8())) } @@ -21742,6 +21745,7 @@ pub unsafe fn _mm256_maskz_shuffle_ps( a: __m256, b: __m256, ) -> __m256 { + static_assert_imm8!(MASK); let r = _mm256_shuffle_ps::(a, b); let zero = _mm256_setzero_ps().as_f32x8(); transmute(simd_select_bitmask(k, r.as_f32x8(), zero)) @@ -21760,6 +21764,7 @@ pub unsafe fn _mm_mask_shuffle_ps( a: __m128, b: __m128, ) -> __m128 { + static_assert_imm8!(MASK); let r = _mm_shuffle_ps::(a, b); transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4())) } @@ -21772,6 +21777,7 @@ pub unsafe fn _mm_mask_shuffle_ps( #[cfg_attr(test, assert_instr(vshufps, MASK = 3))] #[rustc_legacy_const_generics(3)] pub unsafe fn _mm_maskz_shuffle_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + static_assert_imm8!(MASK); let r = _mm_shuffle_ps::(a, b); let zero = _mm_setzero_ps().as_f32x4(); transmute(simd_select_bitmask(k, r.as_f32x4(), zero)) @@ -21815,6 +21821,7 @@ pub unsafe fn _mm512_mask_shuffle_pd( a: __m512d, b: __m512d, ) -> __m512d { + static_assert_imm8!(MASK); let r = _mm512_shuffle_pd::(a, b); transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8())) } @@ -21831,6 +21838,7 @@ pub unsafe fn _mm512_maskz_shuffle_pd( a: __m512d, b: __m512d, ) -> __m512d { + static_assert_imm8!(MASK); let r = _mm512_shuffle_pd::(a, b); let zero = _mm512_setzero_pd().as_f64x8(); transmute(simd_select_bitmask(k, r.as_f64x8(), zero)) @@ -21849,6 +21857,7 @@ pub unsafe fn _mm256_mask_shuffle_pd( a: __m256d, b: __m256d, ) -> __m256d { + static_assert_imm8!(MASK); let r = _mm256_shuffle_pd::(a, b); transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4())) } @@ -21865,6 +21874,7 @@ pub unsafe fn _mm256_maskz_shuffle_pd( a: __m256d, b: __m256d, ) -> __m256d { + static_assert_imm8!(MASK); let r = _mm256_shuffle_pd::(a, b); let zero = _mm256_setzero_pd().as_f64x4(); transmute(simd_select_bitmask(k, r.as_f64x4(), zero)) @@ -21883,6 +21893,7 @@ pub unsafe fn _mm_mask_shuffle_pd( a: __m128d, b: __m128d, ) -> __m128d { + static_assert_imm8!(MASK); let r = _mm_shuffle_pd::(a, b); transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2())) } @@ -21899,6 +21910,7 @@ pub unsafe fn _mm_maskz_shuffle_pd( a: __m128d, b: __m128d, ) -> __m128d { + static_assert_imm8!(MASK); let r = _mm_shuffle_pd::(a, b); let zero = _mm_setzero_pd().as_f64x2(); transmute(simd_select_bitmask(k, r.as_f64x2(), zero)) @@ -21953,6 +21965,7 @@ pub unsafe fn _mm512_mask_shuffle_i32x4( a: __m512i, b: __m512i, ) -> __m512i { + static_assert_imm8!(MASK); let r = _mm512_shuffle_i32x4::(a, b); transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16())) } @@ -21969,6 +21982,7 @@ pub unsafe fn _mm512_maskz_shuffle_i32x4( a: __m512i, b: __m512i, ) -> __m512i { + static_assert_imm8!(MASK); let r = _mm512_shuffle_i32x4::(a, b); let zero = _mm512_setzero_si512().as_i32x16(); transmute(simd_select_bitmask(k, r.as_i32x16(), zero)) @@ -22079,6 +22093,7 @@ pub unsafe fn _mm512_mask_shuffle_i64x2( a: __m512i, b: __m512i, ) -> __m512i { + static_assert_imm8!(MASK); let r = _mm512_shuffle_i64x2::(a, b); transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8())) } @@ -22095,6 +22110,7 @@ pub unsafe fn _mm512_maskz_shuffle_i64x2( a: __m512i, b: __m512i, ) -> __m512i { + static_assert_imm8!(MASK); let r = _mm512_shuffle_i64x2::(a, b); let zero = _mm512_setzero_si512().as_i64x8(); transmute(simd_select_bitmask(k, r.as_i64x8(), zero)) @@ -22137,6 +22153,7 @@ pub unsafe fn _mm256_mask_shuffle_i64x2( a: __m256i, b: __m256i, ) -> __m256i { + static_assert_imm8!(MASK); let r = _mm256_shuffle_i64x2::(a, b); transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4())) } @@ -22153,6 +22170,7 @@ pub unsafe fn _mm256_maskz_shuffle_i64x2( a: __m256i, b: __m256i, ) -> __m256i { + static_assert_imm8!(MASK); let r = _mm256_shuffle_i64x2::(a, b); let zero = _mm256_setzero_si256().as_i64x4(); transmute(simd_select_bitmask(k, r.as_i64x4(), zero)) @@ -22271,6 +22289,7 @@ pub unsafe fn _mm256_mask_shuffle_f32x4( a: __m256, b: __m256, ) -> __m256 { + static_assert_imm8!(MASK); let r = _mm256_shuffle_f32x4::(a, b); transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8())) } @@ -22287,6 +22306,7 @@ pub unsafe fn _mm256_maskz_shuffle_f32x4( a: __m256, b: __m256, ) -> __m256 { + static_assert_imm8!(MASK); let r = _mm256_shuffle_f32x4::(a, b); let zero = _mm256_setzero_ps().as_f32x8(); transmute(simd_select_bitmask(k, r.as_f32x8(), zero)) @@ -22297,61 +22317,27 @@ pub unsafe fn _mm256_maskz_shuffle_f32x4( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shuffle_f64x2&expand=5171) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_shuffle_f64x2(a: __m512d, b: __m512d, imm8: i32) -> __m512d { - assert!(imm8 >= 0 && imm8 <= 255); - let imm8 = (imm8 & 0xFF) as u8; - macro_rules! shuffle4 { - ( - $a:expr, - $b:expr, - $c:expr, - $d:expr, - $e:expr, - $f:expr, - $g:expr, - $h:expr - ) => { - simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]) - }; - } - macro_rules! shuffle3 { - ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => { - match (imm8 >> 6) & 0x3 { - 0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9), - 1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11), - 2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13), - _ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15), - } - }; - } - macro_rules! shuffle2 { - ($a:expr, $b:expr, $e:expr, $f:expr) => { - match (imm8 >> 4) & 0x3 { - 0 => shuffle3!($a, $b, $e, $f, 8, 9), - 1 => shuffle3!($a, $b, $e, $f, 10, 11), - 2 => shuffle3!($a, $b, $e, $f, 12, 13), - _ => shuffle3!($a, $b, $e, $f, 14, 15), - } - }; - } - macro_rules! shuffle1 { - ($a:expr, $e:expr) => { - match (imm8 >> 2) & 0x3 { - 0 => shuffle2!($a, $e, 0, 1), - 1 => shuffle2!($a, $e, 2, 3), - 2 => shuffle2!($a, $e, 4, 5), - _ => shuffle2!($a, $e, 6, 7), - } - }; - } - match imm8 & 0x3 { - 0 => shuffle1!(0, 1), - 1 => shuffle1!(2, 3), - 2 => shuffle1!(4, 5), - _ => shuffle1!(6, 7), - } +#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_shuffle_f64x2(a: __m512d, b: __m512d) -> __m512d { + static_assert_imm8!(MASK); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r: f64x8 = simd_shuffle8( + a, + b, + [ + (MASK as u32 & 0b11) * 2 + 0, + (MASK as u32 & 0b11) * 2 + 1, + ((MASK as u32 >> 2) & 0b11) * 2 + 0, + ((MASK as u32 >> 2) & 0b11) * 2 + 1, + ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8, + ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8, + ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8, + ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8, + ], + ); + transmute(r) } /// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22359,21 +22345,16 @@ pub unsafe fn _mm512_shuffle_f64x2(a: __m512d, b: __m512d, imm8: i32) -> __m512d /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_f64x2&expand=5169) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_shuffle_f64x2( +#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_shuffle_f64x2( src: __m512d, k: __mmask8, a: __m512d, b: __m512d, - imm8: i32, ) -> __m512d { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_f64x2(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(MASK); + let r = _mm512_shuffle_f64x2::(a, b); transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8())) } @@ -22382,20 +22363,15 @@ pub unsafe fn _mm512_mask_shuffle_f64x2( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_f64x2&expand=5170) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_shuffle_f64x2( +#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_shuffle_f64x2( k: __mmask8, a: __m512d, b: __m512d, - imm8: i32, ) -> __m512d { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_f64x2(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(MASK); + let r = _mm512_shuffle_f64x2::(a, b); let zero = _mm512_setzero_pd().as_f64x8(); transmute(simd_select_bitmask(k, r.as_f64x8(), zero)) } @@ -22405,35 +22381,22 @@ pub unsafe fn _mm512_maskz_shuffle_f64x2( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_f64x2&expand=5168) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vperm, imm8 = 0b01))] //should be vshuff64x2 -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_shuffle_f64x2(a: __m256d, b: __m256d, imm8: i32) -> __m256d { - assert!(imm8 >= 0 && imm8 <= 255); - let imm8 = (imm8 & 0xFF) as u8; +#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff64x2 +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_shuffle_f64x2(a: __m256d, b: __m256d) -> __m256d { + static_assert_imm8!(MASK); let a = a.as_f64x4(); let b = b.as_f64x4(); - macro_rules! shuffle2 { - ( - $a:expr, - $b:expr, - $c:expr, - $d:expr - ) => { - simd_shuffle4(a, b, [$a, $b, $c, $d]) - }; - } - macro_rules! shuffle1 { - ($a:expr, $b:expr) => { - match (imm8 >> 1) & 0x1 { - 0 => shuffle2!($a, $b, 4, 5), - _ => shuffle2!($a, $b, 6, 7), - } - }; - } - let r: f64x4 = match imm8 & 0x1 { - 0 => shuffle1!(0, 1), - _ => shuffle1!(2, 3), - }; + let r: f64x4 = simd_shuffle4( + a, + b, + [ + (MASK as u32 & 0b1) * 2 + 0, + (MASK as u32 & 0b1) * 2 + 1, + ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4, + ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4, + ], + ); transmute(r) } @@ -22442,21 +22405,16 @@ pub unsafe fn _mm256_shuffle_f64x2(a: __m256d, b: __m256d, imm8: i32) -> __m256d /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shuffle_f64x2&expand=5166) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b11))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_shuffle_f64x2( +#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_shuffle_f64x2( src: __m256d, k: __mmask8, a: __m256d, b: __m256d, - imm8: i32, ) -> __m256d { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_f64x2(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(MASK); + let r = _mm256_shuffle_f64x2::(a, b); transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4())) } @@ -22465,20 +22423,15 @@ pub unsafe fn _mm256_mask_shuffle_f64x2( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shuffle_f64x2&expand=5167) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b11))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_maskz_shuffle_f64x2( +#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_maskz_shuffle_f64x2( k: __mmask8, a: __m256d, b: __m256d, - imm8: i32, ) -> __m256d { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_f64x2(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(MASK); + let r = _mm256_shuffle_f64x2::(a, b); let zero = _mm256_setzero_pd().as_f64x4(); transmute(simd_select_bitmask(k, r.as_f64x4(), zero)) } diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index 6d816b86c3..9ad35f7166 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -9720,7 +9720,7 @@ mod tests { unsafe fn test_mm512_shuffle_f64x2() { let a = _mm512_setr_pd(1., 4., 5., 8., 9., 12., 13., 16.); let b = _mm512_setr_pd(2., 3., 6., 7., 10., 11., 14., 15.); - let r = _mm512_shuffle_f64x2(a, b, 0b00000000); + let r = _mm512_shuffle_f64x2::<0b00_00_00_00>(a, b); let e = _mm512_setr_pd(1., 4., 1., 4., 2., 3., 2., 3.); assert_eq_m512d(r, e); } @@ -9729,9 +9729,9 @@ mod tests { unsafe fn test_mm512_mask_shuffle_f64x2() { let a = _mm512_setr_pd(1., 4., 5., 8., 9., 12., 13., 16.); let b = _mm512_setr_pd(2., 3., 6., 7., 10., 11., 14., 15.); - let r = _mm512_mask_shuffle_f64x2(a, 0, a, b, 0b00000000); + let r = _mm512_mask_shuffle_f64x2::<0b00_00_00_00>(a, 0, a, b); assert_eq_m512d(r, a); - let r = _mm512_mask_shuffle_f64x2(a, 0b11111111, a, b, 0b00000000); + let r = _mm512_mask_shuffle_f64x2::<0b00_00_00_00>(a, 0b11111111, a, b); let e = _mm512_setr_pd(1., 4., 1., 4., 2., 3., 2., 3.); assert_eq_m512d(r, e); } @@ -9740,9 +9740,9 @@ mod tests { unsafe fn test_mm512_maskz_shuffle_f64x2() { let a = _mm512_setr_pd(1., 4., 5., 8., 9., 12., 13., 16.); let b = _mm512_setr_pd(2., 3., 6., 7., 10., 11., 14., 15.); - let r = _mm512_maskz_shuffle_f64x2(0, a, b, 0b00000000); + let r = _mm512_maskz_shuffle_f64x2::<0b00_00_00_00>(0, a, b); assert_eq_m512d(r, _mm512_setzero_pd()); - let r = _mm512_maskz_shuffle_f64x2(0b00001111, a, b, 0b00000000); + let r = _mm512_maskz_shuffle_f64x2::<0b00_00_00_00>(0b00001111, a, b); let e = _mm512_setr_pd(1., 4., 1., 4., 0., 0., 0., 0.); assert_eq_m512d(r, e); } @@ -9751,7 +9751,7 @@ mod tests { unsafe fn test_mm256_shuffle_f64x2() { let a = _mm256_set_pd(1., 4., 5., 8.); let b = _mm256_set_pd(2., 3., 6., 7.); - let r = _mm256_shuffle_f64x2(a, b, 0b00); + let r = _mm256_shuffle_f64x2::<0b00>(a, b); let e = _mm256_set_pd(6., 7., 5., 8.); assert_eq_m256d(r, e); } @@ -9760,9 +9760,9 @@ mod tests { unsafe fn test_mm256_mask_shuffle_f64x2() { let a = _mm256_set_pd(1., 4., 5., 8.); let b = _mm256_set_pd(2., 3., 6., 7.); - let r = _mm256_mask_shuffle_f64x2(a, 0, a, b, 0b00); + let r = _mm256_mask_shuffle_f64x2::<0b00>(a, 0, a, b); assert_eq_m256d(r, a); - let r = _mm256_mask_shuffle_f64x2(a, 0b00001111, a, b, 0b00); + let r = _mm256_mask_shuffle_f64x2::<0b00>(a, 0b00001111, a, b); let e = _mm256_set_pd(6., 7., 5., 8.); assert_eq_m256d(r, e); } @@ -9771,9 +9771,9 @@ mod tests { unsafe fn test_mm256_maskz_shuffle_f64x2() { let a = _mm256_set_pd(1., 4., 5., 8.); let b = _mm256_set_pd(2., 3., 6., 7.); - let r = _mm256_maskz_shuffle_f64x2(0, a, b, 0b00); + let r = _mm256_maskz_shuffle_f64x2::<0b00>(0, a, b); assert_eq_m256d(r, _mm256_setzero_pd()); - let r = _mm256_maskz_shuffle_f64x2(0b00001111, a, b, 0b00); + let r = _mm256_maskz_shuffle_f64x2::<0b00>(0b00001111, a, b); let e = _mm256_set_pd(6., 7., 5., 8.); assert_eq_m256d(r, e); } From bc0e28b24eece7c8571585a26d068242c054026c Mon Sep 17 00:00:00 2001 From: jironglin Date: Thu, 4 Mar 2021 11:33:56 +0000 Subject: [PATCH 011/123] move x86/constify_imm4 macro --- crates/core_arch/src/x86/macros.rs | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/crates/core_arch/src/x86/macros.rs b/crates/core_arch/src/x86/macros.rs index 1c02de24a7..bf734974af 100644 --- a/crates/core_arch/src/x86/macros.rs +++ b/crates/core_arch/src/x86/macros.rs @@ -48,31 +48,6 @@ macro_rules! static_assert_imm8u { }; } -#[allow(unused_macros)] -macro_rules! constify_imm4 { - ($imm8:expr, $expand:ident) => { - #[allow(overflowing_literals)] - match ($imm8) & 0b1111 { - 0 => $expand!(0), - 1 => $expand!(1), - 2 => $expand!(2), - 3 => $expand!(3), - 4 => $expand!(4), - 5 => $expand!(5), - 6 => $expand!(6), - 7 => $expand!(7), - 8 => $expand!(8), - 9 => $expand!(9), - 10 => $expand!(10), - 11 => $expand!(11), - 12 => $expand!(12), - 13 => $expand!(13), - 14 => $expand!(14), - _ => $expand!(15), - } - }; -} - macro_rules! constify_imm3 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] From 3239cea7982663fea2606387e51952f07efe0c97 Mon Sep 17 00:00:00 2001 From: jironglin Date: Thu, 4 Mar 2021 14:21:46 +0000 Subject: [PATCH 012/123] mm_cvtt_roundss,sd_u64,i64,si64; mm_cvt_roundss,sd_u64,i64,si64; mm_cvt_roundu64,i64,si64_ss,sd --- crates/core_arch/src/x86_64/avx512f.rs | 270 +++++++++---------------- 1 file changed, 90 insertions(+), 180 deletions(-) diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index 9ad35f7166..43906f7714 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -145,16 +145,11 @@ pub unsafe fn _mm_cvttss_u64(a: __m128) -> u64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_sd&expand=1313) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsi2sd, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_cvt_roundi64_sd(a: __m128d, b: i64, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vcvtsi2sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundi64_sd(a: __m128d, b: i64) -> __m128d { let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsi2sd64(a, b, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsi2sd64(a, b, ROUNDING); transmute(r) } @@ -169,16 +164,11 @@ pub unsafe fn _mm_cvt_roundi64_sd(a: __m128d, b: i64, rounding: i32) -> __m128d /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsi64_sd&expand=1367) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsi2sd, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_cvt_roundsi64_sd(a: __m128d, b: i64, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vcvtsi2sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundsi64_sd(a: __m128d, b: i64) -> __m128d { let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsi2sd64(a, b, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsi2sd64(a, b, ROUNDING); transmute(r) } @@ -193,16 +183,11 @@ pub unsafe fn _mm_cvt_roundsi64_sd(a: __m128d, b: i64, rounding: i32) -> __m128d /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_ss&expand=1314) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsi2ss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_cvt_roundi64_ss(a: __m128, b: i64, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundi64_ss(a: __m128, b: i64) -> __m128 { let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtsi2ss64(a, b, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsi2ss64(a, b, ROUNDING); transmute(r) } @@ -217,16 +202,11 @@ pub unsafe fn _mm_cvt_roundi64_ss(a: __m128, b: i64, rounding: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu64_sd&expand=1379) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtusi2sd, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_cvt_roundu64_sd(a: __m128d, b: u64, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vcvtusi2sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundu64_sd(a: __m128d, b: u64) -> __m128d { let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtusi2sd64(a, b, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtusi2sd64(a, b, ROUNDING); transmute(r) } @@ -241,16 +221,11 @@ pub unsafe fn _mm_cvt_roundu64_sd(a: __m128d, b: u64, rounding: i32) -> __m128d /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsi64_ss&expand=1368) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsi2ss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_cvt_roundsi64_ss(a: __m128, b: i64, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundsi64_ss(a: __m128, b: i64) -> __m128 { let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtsi2ss64(a, b, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsi2ss64(a, b, ROUNDING); transmute(r) } @@ -265,16 +240,11 @@ pub unsafe fn _mm_cvt_roundsi64_ss(a: __m128, b: i64, rounding: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu64_ss&expand=1380) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtusi2ss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_cvt_roundu64_ss(a: __m128, b: u64, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundu64_ss(a: __m128, b: u64) -> __m128 { let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtusi2ss64(a, b, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtusi2ss64(a, b, ROUNDING); transmute(r) } @@ -289,16 +259,11 @@ pub unsafe fn _mm_cvt_roundu64_ss(a: __m128, b: u64, rounding: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_si64&expand=1360) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2si, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundsd_si64(a: __m128d, rounding: i32) -> i64 { +#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundsd_si64(a: __m128d) -> i64 { let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2si64(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsd2si64(a, ROUNDING); transmute(r) } @@ -313,16 +278,11 @@ pub unsafe fn _mm_cvt_roundsd_si64(a: __m128d, rounding: i32) -> i64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_i64&expand=1358) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2si, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundsd_i64(a: __m128d, rounding: i32) -> i64 { +#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundsd_i64(a: __m128d) -> i64 { let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2si64(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsd2si64(a, ROUNDING); transmute(r) } @@ -337,16 +297,11 @@ pub unsafe fn _mm_cvt_roundsd_i64(a: __m128d, rounding: i32) -> i64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_u64&expand=1365) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2usi, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundsd_u64(a: __m128d, rounding: i32) -> u64 { +#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundsd_u64(a: __m128d) -> u64 { let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2usi64(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsd2usi64(a, ROUNDING); transmute(r) } @@ -361,16 +316,11 @@ pub unsafe fn _mm_cvt_roundsd_u64(a: __m128d, rounding: i32) -> u64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_si64&expand=1375) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2si, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundss_si64(a: __m128, rounding: i32) -> i64 { +#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundss_si64(a: __m128) -> i64 { let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2si64(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtss2si64(a, ROUNDING); transmute(r) } @@ -385,16 +335,11 @@ pub unsafe fn _mm_cvt_roundss_si64(a: __m128, rounding: i32) -> i64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_i64&expand=1370) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2si, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundss_i64(a: __m128, rounding: i32) -> i64 { +#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundss_i64(a: __m128) -> i64 { let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2si64(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtss2si64(a, ROUNDING); transmute(r) } @@ -409,16 +354,11 @@ pub unsafe fn _mm_cvt_roundss_i64(a: __m128, rounding: i32) -> i64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_u64&expand=1377) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2usi, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundss_u64(a: __m128, rounding: i32) -> u64 { +#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundss_u64(a: __m128) -> u64 { let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2usi64(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtss2usi64(a, ROUNDING); transmute(r) } @@ -428,16 +368,11 @@ pub unsafe fn _mm_cvt_roundss_u64(a: __m128, rounding: i32) -> u64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_si64&expand=1931) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundsd_si64(a: __m128d, sae: i32) -> i64 { +#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundsd_si64(a: __m128d) -> i64 { let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2si64(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtsd2si64(a, SAE); transmute(r) } @@ -447,16 +382,11 @@ pub unsafe fn _mm_cvtt_roundsd_si64(a: __m128d, sae: i32) -> i64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_i64&expand=1929) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundsd_i64(a: __m128d, sae: i32) -> i64 { +#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundsd_i64(a: __m128d) -> i64 { let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2si64(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtsd2si64(a, SAE); transmute(r) } @@ -466,16 +396,11 @@ pub unsafe fn _mm_cvtt_roundsd_i64(a: __m128d, sae: i32) -> i64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_u64&expand=1933) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2usi, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundsd_u64(a: __m128d, sae: i32) -> u64 { +#[cfg_attr(test, assert_instr(vcvtsd2usi, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundsd_u64(a: __m128d) -> u64 { let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2usi64(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtsd2usi64(a, SAE); transmute(r) } @@ -485,16 +410,11 @@ pub unsafe fn _mm_cvtt_roundsd_u64(a: __m128d, sae: i32) -> u64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_i64&expand=1935) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2si, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundss_i64(a: __m128, sae: i32) -> i64 { +#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundss_i64(a: __m128) -> i64 { let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2si64(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtss2si64(a, SAE); transmute(r) } @@ -504,16 +424,11 @@ pub unsafe fn _mm_cvtt_roundss_i64(a: __m128, sae: i32) -> i64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_si64&expand=1937) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2si, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundss_si64(a: __m128, sae: i32) -> i64 { +#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundss_si64(a: __m128) -> i64 { let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2si64(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtss2si64(a, SAE); transmute(r) } @@ -523,16 +438,11 @@ pub unsafe fn _mm_cvtt_roundss_si64(a: __m128, sae: i32) -> i64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_u64&expand=1939) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2usi, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundss_u64(a: __m128, sae: i32) -> u64 { +#[cfg_attr(test, assert_instr(vcvtss2usi, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundss_u64(a: __m128) -> u64 { let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2usi64(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtss2usi64(a, SAE); transmute(r) } @@ -12197,7 +12107,7 @@ mod tests { unsafe fn test_mm_cvt_roundi64_ss() { let a = _mm_set_ps(0., -0.5, 1., -1.5); let b: i64 = 9; - let r = _mm_cvt_roundi64_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundi64_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(0., -0.5, 1., 9.); assert_eq_m128(r, e); } @@ -12206,7 +12116,7 @@ mod tests { unsafe fn test_mm_cvt_roundsi64_ss() { let a = _mm_set_ps(0., -0.5, 1., -1.5); let b: i64 = 9; - let r = _mm_cvt_roundsi64_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundsi64_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(0., -0.5, 1., 9.); assert_eq_m128(r, e); } @@ -12232,7 +12142,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundsd_si64() { let a = _mm_set_pd(1., -1.5); - let r = _mm_cvt_roundsd_si64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundsd_si64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: i64 = -1; assert_eq!(r, e); } @@ -12240,7 +12150,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundsd_i64() { let a = _mm_set_pd(1., -1.5); - let r = _mm_cvt_roundsd_i64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundsd_i64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: i64 = -1; assert_eq!(r, e); } @@ -12248,7 +12158,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundsd_u64() { let a = _mm_set_pd(1., f64::MAX); - let r = _mm_cvt_roundsd_u64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundsd_u64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: u64 = u64::MAX; assert_eq!(r, e); } @@ -12264,7 +12174,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundss_i64() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvt_roundss_i64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundss_i64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: i64 = -1; assert_eq!(r, e); } @@ -12272,7 +12182,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundss_si64() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvt_roundss_si64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundss_si64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: i64 = -1; assert_eq!(r, e); } @@ -12280,7 +12190,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundss_u64() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvt_roundss_u64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundss_u64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: u64 = u64::MAX; assert_eq!(r, e); } @@ -12304,7 +12214,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundsd_i64() { let a = _mm_set_pd(1., -1.5); - let r = _mm_cvtt_roundsd_i64(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundsd_i64::<_MM_FROUND_CUR_DIRECTION>(a); let e: i64 = -2; assert_eq!(r, e); } @@ -12312,7 +12222,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundsd_si64() { let a = _mm_set_pd(1., -1.5); - let r = _mm_cvtt_roundsd_si64(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundsd_si64::<_MM_FROUND_CUR_DIRECTION>(a); let e: i64 = -2; assert_eq!(r, e); } @@ -12320,7 +12230,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundsd_u64() { let a = _mm_set_pd(1., -1.5); - let r = _mm_cvtt_roundsd_u64(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundsd_u64::<_MM_FROUND_CUR_DIRECTION>(a); let e: u64 = u64::MAX; assert_eq!(r, e); } @@ -12344,7 +12254,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundss_i64() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvtt_roundss_i64(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundss_i64::<_MM_FROUND_CUR_DIRECTION>(a); let e: i64 = -2; assert_eq!(r, e); } @@ -12352,7 +12262,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundss_si64() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvtt_roundss_si64(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundss_si64::<_MM_FROUND_CUR_DIRECTION>(a); let e: i64 = -2; assert_eq!(r, e); } @@ -12360,7 +12270,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundss_u64() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvtt_roundss_u64(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundss_u64::<_MM_FROUND_CUR_DIRECTION>(a); let e: u64 = u64::MAX; assert_eq!(r, e); } @@ -12395,7 +12305,7 @@ mod tests { unsafe fn test_mm_cvt_roundu64_ss() { let a = _mm_set_ps(0., -0.5, 1., -1.5); let b: u64 = 9; - let r = _mm_cvt_roundu64_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundu64_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(0., -0.5, 1., 9.); assert_eq_m128(r, e); } @@ -12404,7 +12314,7 @@ mod tests { unsafe fn test_mm_cvt_roundu64_sd() { let a = _mm_set_pd(1., -1.5); let b: u64 = 9; - let r = _mm_cvt_roundu64_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundu64_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_pd(1., 9.); assert_eq_m128d(r, e); } @@ -12413,7 +12323,7 @@ mod tests { unsafe fn test_mm_cvt_roundi64_sd() { let a = _mm_set_pd(1., -1.5); let b: i64 = 9; - let r = _mm_cvt_roundi64_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundi64_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_pd(1., 9.); assert_eq_m128d(r, e); } @@ -12422,7 +12332,7 @@ mod tests { unsafe fn test_mm_cvt_roundsi64_sd() { let a = _mm_set_pd(1., -1.5); let b: i64 = 9; - let r = _mm_cvt_roundsi64_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundsi64_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_pd(1., 9.); assert_eq_m128d(r, e); } From 5a7e50ac6e7c91998eaf52e1bff7956e596e9d93 Mon Sep 17 00:00:00 2001 From: jironglin Date: Thu, 4 Mar 2021 14:41:58 +0000 Subject: [PATCH 013/123] add static_assert --- crates/core_arch/src/x86_64/avx512f.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index 43906f7714..af62b2112c 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -148,6 +148,7 @@ pub unsafe fn _mm_cvttss_u64(a: __m128) -> u64 { #[cfg_attr(test, assert_instr(vcvtsi2sd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] pub unsafe fn _mm_cvt_roundi64_sd(a: __m128d, b: i64) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let r = vcvtsi2sd64(a, b, ROUNDING); transmute(r) @@ -167,6 +168,7 @@ pub unsafe fn _mm_cvt_roundi64_sd(a: __m128d, b: i64) -> __ #[cfg_attr(test, assert_instr(vcvtsi2sd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] pub unsafe fn _mm_cvt_roundsi64_sd(a: __m128d, b: i64) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let r = vcvtsi2sd64(a, b, ROUNDING); transmute(r) @@ -186,6 +188,7 @@ pub unsafe fn _mm_cvt_roundsi64_sd(a: __m128d, b: i64) -> _ #[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] pub unsafe fn _mm_cvt_roundi64_ss(a: __m128, b: i64) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let r = vcvtsi2ss64(a, b, ROUNDING); transmute(r) @@ -205,6 +208,7 @@ pub unsafe fn _mm_cvt_roundi64_ss(a: __m128, b: i64) -> __m #[cfg_attr(test, assert_instr(vcvtusi2sd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] pub unsafe fn _mm_cvt_roundu64_sd(a: __m128d, b: u64) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let r = vcvtusi2sd64(a, b, ROUNDING); transmute(r) @@ -224,6 +228,7 @@ pub unsafe fn _mm_cvt_roundu64_sd(a: __m128d, b: u64) -> __ #[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] pub unsafe fn _mm_cvt_roundsi64_ss(a: __m128, b: i64) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let r = vcvtsi2ss64(a, b, ROUNDING); transmute(r) @@ -243,6 +248,7 @@ pub unsafe fn _mm_cvt_roundsi64_ss(a: __m128, b: i64) -> __ #[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] pub unsafe fn _mm_cvt_roundu64_ss(a: __m128, b: u64) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let r = vcvtusi2ss64(a, b, ROUNDING); transmute(r) @@ -262,6 +268,7 @@ pub unsafe fn _mm_cvt_roundu64_ss(a: __m128, b: u64) -> __m #[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvt_roundsd_si64(a: __m128d) -> i64 { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let r = vcvtsd2si64(a, ROUNDING); transmute(r) @@ -281,6 +288,7 @@ pub unsafe fn _mm_cvt_roundsd_si64(a: __m128d) -> i64 { #[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvt_roundsd_i64(a: __m128d) -> i64 { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let r = vcvtsd2si64(a, ROUNDING); transmute(r) @@ -300,6 +308,7 @@ pub unsafe fn _mm_cvt_roundsd_i64(a: __m128d) -> i64 { #[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvt_roundsd_u64(a: __m128d) -> u64 { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let r = vcvtsd2usi64(a, ROUNDING); transmute(r) @@ -319,6 +328,7 @@ pub unsafe fn _mm_cvt_roundsd_u64(a: __m128d) -> u64 { #[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvt_roundss_si64(a: __m128) -> i64 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let r = vcvtss2si64(a, ROUNDING); transmute(r) @@ -338,6 +348,7 @@ pub unsafe fn _mm_cvt_roundss_si64(a: __m128) -> i64 { #[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvt_roundss_i64(a: __m128) -> i64 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let r = vcvtss2si64(a, ROUNDING); transmute(r) @@ -357,6 +368,7 @@ pub unsafe fn _mm_cvt_roundss_i64(a: __m128) -> i64 { #[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvt_roundss_u64(a: __m128) -> u64 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let r = vcvtss2usi64(a, ROUNDING); transmute(r) @@ -371,6 +383,7 @@ pub unsafe fn _mm_cvt_roundss_u64(a: __m128) -> u64 { #[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundsd_si64(a: __m128d) -> i64 { + static_assert_sae!(SAE); let a = a.as_f64x2(); let r = vcvtsd2si64(a, SAE); transmute(r) @@ -385,6 +398,7 @@ pub unsafe fn _mm_cvtt_roundsd_si64(a: __m128d) -> i64 { #[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundsd_i64(a: __m128d) -> i64 { + static_assert_sae!(SAE); let a = a.as_f64x2(); let r = vcvtsd2si64(a, SAE); transmute(r) @@ -399,6 +413,7 @@ pub unsafe fn _mm_cvtt_roundsd_i64(a: __m128d) -> i64 { #[cfg_attr(test, assert_instr(vcvtsd2usi, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundsd_u64(a: __m128d) -> u64 { + static_assert_sae!(SAE); let a = a.as_f64x2(); let r = vcvtsd2usi64(a, SAE); transmute(r) @@ -413,6 +428,7 @@ pub unsafe fn _mm_cvtt_roundsd_u64(a: __m128d) -> u64 { #[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundss_i64(a: __m128) -> i64 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let r = vcvtss2si64(a, SAE); transmute(r) @@ -427,6 +443,7 @@ pub unsafe fn _mm_cvtt_roundss_i64(a: __m128) -> i64 { #[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundss_si64(a: __m128) -> i64 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let r = vcvtss2si64(a, SAE); transmute(r) @@ -441,6 +458,7 @@ pub unsafe fn _mm_cvtt_roundss_si64(a: __m128) -> i64 { #[cfg_attr(test, assert_instr(vcvtss2usi, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundss_u64(a: __m128) -> u64 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let r = vcvtss2usi64(a, SAE); transmute(r) From 18b98104c785e9fd56bec2e6203c733f42eddc28 Mon Sep 17 00:00:00 2001 From: jironglin Date: Thu, 4 Mar 2021 15:00:58 +0000 Subject: [PATCH 014/123] fix x86_64/macro --- crates/core_arch/src/x86_64/macros.rs | 47 ++++++++++++++------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/crates/core_arch/src/x86_64/macros.rs b/crates/core_arch/src/x86_64/macros.rs index e3682d40fe..cafa37dd6f 100644 --- a/crates/core_arch/src/x86_64/macros.rs +++ b/crates/core_arch/src/x86_64/macros.rs @@ -1,32 +1,33 @@ //! Utility macros. -// For round instructions, the only valid values for rounding are 4, 8, 9, 10 and 11. -// This macro enforces that. +// Helper struct used to trigger const eval errors when the const generic immediate value `imm` is +// not a round number. +pub(crate) struct ValidateConstRound; +impl ValidateConstRound { + pub(crate) const VALID: () = { + let _ = 1 / ((IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11) as usize); + }; +} + #[allow(unused)] -macro_rules! constify_imm4_round { - ($imm8:expr, $expand:ident) => { - #[allow(overflowing_literals)] - match ($imm8) & 0b1111 { - 4 => $expand!(4), - 8 => $expand!(8), - 9 => $expand!(9), - 10 => $expand!(10), - 11 => $expand!(11), - _ => panic!("Invalid round value"), - } +macro_rules! static_assert_rounding { + ($imm:ident) => { + let _ = $crate::core_arch::x86_64::macros::ValidateConstRound::<$imm>::VALID; + }; +} + +// Helper struct used to trigger const eval errors when the const generic immediate value `imm` is +// not a sae number. +pub(crate) struct ValidateConstSae; +impl ValidateConstSae { + pub(crate) const VALID: () = { + let _ = 1 / ((IMM == 4 || IMM == 8) as usize); }; } -// For sae instructions, the only valid values for sae are 4 and 8. -// This macro enforces that. #[allow(unused)] -macro_rules! constify_imm4_sae { - ($imm8:expr, $expand:ident) => { - #[allow(overflowing_literals)] - match ($imm8) & 0b1111 { - 4 => $expand!(4), - 8 => $expand!(8), - _ => panic!("Invalid sae value"), - } +macro_rules! static_assert_sae { + ($imm:ident) => { + let _ = $crate::core_arch::x86_64::macros::ValidateConstSae::<$imm>::VALID; }; } From fabb653dde886527a729d784b06aa814758c94ac Mon Sep 17 00:00:00 2001 From: jironglin Date: Thu, 4 Mar 2021 15:31:00 +0000 Subject: [PATCH 015/123] remove x86/macro imm4_sae,imm4_rounding --- crates/core_arch/src/x86/macros.rs | 31 ------------------------------ 1 file changed, 31 deletions(-) diff --git a/crates/core_arch/src/x86/macros.rs b/crates/core_arch/src/x86/macros.rs index bf734974af..76b87b40f4 100644 --- a/crates/core_arch/src/x86/macros.rs +++ b/crates/core_arch/src/x86/macros.rs @@ -200,37 +200,6 @@ macro_rules! constify_imm8_gather { }; } -// For round instructions, the only valid values for rounding are 4, 8, 9, 10 and 11. -// This macro enforces that. -#[allow(unused)] -macro_rules! constify_imm4_round { - ($imm8:expr, $expand:ident) => { - #[allow(overflowing_literals)] - match ($imm8) & 0b1111 { - 4 => $expand!(4), - 8 => $expand!(8), - 9 => $expand!(9), - 10 => $expand!(10), - 11 => $expand!(11), - _ => panic!("Invalid round value"), - } - }; -} - -// For sae instructions, the only valid values for sae are 4 and 8. -// This macro enforces that. -#[allow(unused)] -macro_rules! constify_imm4_sae { - ($imm8:expr, $expand:ident) => { - #[allow(overflowing_literals)] - match ($imm8) & 0b1111 { - 4 => $expand!(4), - 8 => $expand!(8), - _ => panic!("Invalid sae value"), - } - }; -} - // Two mantissas parameters. // This macro enforces that. #[allow(unused)] From 0ce558fe0cc88e4eff30833dfee937fe80393905 Mon Sep 17 00:00:00 2001 From: jironglin Date: Thu, 4 Mar 2021 16:56:42 +0000 Subject: [PATCH 016/123] shldi,shrdi_epi64,epi32,epi16 --- crates/core_arch/src/x86/avx512vbmi2.rs | 865 +++++++++++++----------- 1 file changed, 468 insertions(+), 397 deletions(-) diff --git a/crates/core_arch/src/x86/avx512vbmi2.rs b/crates/core_arch/src/x86/avx512vbmi2.rs index 032bce9176..b7a385dd97 100644 --- a/crates/core_arch/src/x86/avx512vbmi2.rs +++ b/crates/core_arch/src/x86/avx512vbmi2.rs @@ -920,14 +920,15 @@ pub unsafe fn _mm_maskz_shrdv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m1 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shldi_epi64&expand=5060) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_shldi_epi64(a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_shldi_epi64(a: __m512i, b: __m512i) -> __m512i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; transmute(vpshldvq( a.as_i64x8(), b.as_i64x8(), - _mm512_set1_epi64(imm8 as i64).as_i64x8(), + _mm512_set1_epi64(imm8).as_i64x8(), )) } @@ -936,20 +937,20 @@ pub unsafe fn _mm512_shldi_epi64(a: __m512i, b: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shldi_epi64&expand=5058) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_shldi_epi64( +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_shldi_epi64( src: __m512i, k: __mmask8, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; let shf: i64x8 = vpshldvq( a.as_i64x8(), b.as_i64x8(), - _mm512_set1_epi64(imm8 as i64).as_i64x8(), + _mm512_set1_epi64(imm8).as_i64x8(), ); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } @@ -959,14 +960,19 @@ pub unsafe fn _mm512_mask_shldi_epi64( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shldi_epi64&expand=5059) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_shldi_epi64(k: __mmask8, a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_shldi_epi64( + k: __mmask8, + a: __m512i, + b: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; let shf: i64x8 = vpshldvq( a.as_i64x8(), b.as_i64x8(), - _mm512_set1_epi64(imm8 as i64).as_i64x8(), + _mm512_set1_epi64(imm8).as_i64x8(), ); let zero = _mm512_setzero_si512().as_i64x8(); transmute(simd_select_bitmask(k, shf, zero)) @@ -977,14 +983,15 @@ pub unsafe fn _mm512_maskz_shldi_epi64(k: __mmask8, a: __m512i, b: __m512i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shldi_epi64&expand=5057) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_shldi_epi64(a: __m256i, b: __m256i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_shldi_epi64(a: __m256i, b: __m256i) -> __m256i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; transmute(vpshldvq256( a.as_i64x4(), b.as_i64x4(), - _mm256_set1_epi64x(imm8 as i64).as_i64x4(), + _mm256_set1_epi64x(imm8).as_i64x4(), )) } @@ -993,20 +1000,20 @@ pub unsafe fn _mm256_shldi_epi64(a: __m256i, b: __m256i, imm8: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shldi_epi64&expand=5055) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_shldi_epi64( +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_shldi_epi64( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; let shf: i64x4 = vpshldvq256( a.as_i64x4(), b.as_i64x4(), - _mm256_set1_epi64x(imm8 as i64).as_i64x4(), + _mm256_set1_epi64x(imm8).as_i64x4(), ); transmute(simd_select_bitmask(k, shf, src.as_i64x4())) } @@ -1016,14 +1023,19 @@ pub unsafe fn _mm256_mask_shldi_epi64( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shldi_epi64&expand=5056) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_maskz_shldi_epi64(k: __mmask8, a: __m256i, b: __m256i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_maskz_shldi_epi64( + k: __mmask8, + a: __m256i, + b: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; let shf: i64x4 = vpshldvq256( a.as_i64x4(), b.as_i64x4(), - _mm256_set1_epi64x(imm8 as i64).as_i64x4(), + _mm256_set1_epi64x(imm8).as_i64x4(), ); let zero = _mm256_setzero_si256().as_i64x4(); transmute(simd_select_bitmask(k, shf, zero)) @@ -1034,14 +1046,15 @@ pub unsafe fn _mm256_maskz_shldi_epi64(k: __mmask8, a: __m256i, b: __m256i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shldi_epi64&expand=5054) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_shldi_epi64(a: __m128i, b: __m128i, imm8: i32) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_shldi_epi64(a: __m128i, b: __m128i) -> __m128i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; transmute(vpshldvq128( a.as_i64x2(), b.as_i64x2(), - _mm_set1_epi64x(imm8 as i64).as_i64x2(), + _mm_set1_epi64x(imm8).as_i64x2(), )) } @@ -1050,21 +1063,17 @@ pub unsafe fn _mm_shldi_epi64(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shldi_epi64&expand=5052) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_shldi_epi64( +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_shldi_epi64( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, - imm8: i32, ) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); - let shf: i64x2 = vpshldvq128( - a.as_i64x2(), - b.as_i64x2(), - _mm_set1_epi64x(imm8 as i64).as_i64x2(), - ); + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; + let shf: i64x2 = vpshldvq128(a.as_i64x2(), b.as_i64x2(), _mm_set1_epi64x(imm8).as_i64x2()); transmute(simd_select_bitmask(k, shf, src.as_i64x2())) } @@ -1073,15 +1082,16 @@ pub unsafe fn _mm_mask_shldi_epi64( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shldi_epi64&expand=5053) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_shldi_epi64(k: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); - let shf: i64x2 = vpshldvq128( - a.as_i64x2(), - b.as_i64x2(), - _mm_set1_epi64x(imm8 as i64).as_i64x2(), - ); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_shldi_epi64( + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; + let shf: i64x2 = vpshldvq128(a.as_i64x2(), b.as_i64x2(), _mm_set1_epi64x(imm8).as_i64x2()); let zero = _mm_setzero_si128().as_i64x2(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1091,14 +1101,14 @@ pub unsafe fn _mm_maskz_shldi_epi64(k: __mmask8, a: __m128i, b: __m128i, imm8: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shldi_epi32&expand=5051) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_shldi_epi32(a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_shldi_epi32(a: __m512i, b: __m512i) -> __m512i { + static_assert_imm8!(IMM8); transmute(vpshldvd( a.as_i32x16(), b.as_i32x16(), - _mm512_set1_epi32(imm8).as_i32x16(), + _mm512_set1_epi32(IMM8).as_i32x16(), )) } @@ -1107,20 +1117,19 @@ pub unsafe fn _mm512_shldi_epi32(a: __m512i, b: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shldi_epi32&expand=5049) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_shldi_epi32( +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_shldi_epi32( src: __m512i, k: __mmask16, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); + static_assert_imm8!(IMM8); let shf: i32x16 = vpshldvd( a.as_i32x16(), b.as_i32x16(), - _mm512_set1_epi32(imm8).as_i32x16(), + _mm512_set1_epi32(IMM8).as_i32x16(), ); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } @@ -1130,14 +1139,18 @@ pub unsafe fn _mm512_mask_shldi_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shldi_epi32&expand=5050) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_shldi_epi32(k: __mmask16, a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_shldi_epi32( + k: __mmask16, + a: __m512i, + b: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); let shf: i32x16 = vpshldvd( a.as_i32x16(), b.as_i32x16(), - _mm512_set1_epi32(imm8).as_i32x16(), + _mm512_set1_epi32(IMM8).as_i32x16(), ); let zero = _mm512_setzero_si512().as_i32x16(); transmute(simd_select_bitmask(k, shf, zero)) @@ -1148,14 +1161,14 @@ pub unsafe fn _mm512_maskz_shldi_epi32(k: __mmask16, a: __m512i, b: __m512i, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shldi_epi32&expand=5048) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_shldi_epi32(a: __m256i, b: __m256i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_shldi_epi32(a: __m256i, b: __m256i) -> __m256i { + static_assert_imm8!(IMM8); transmute(vpshldvd256( a.as_i32x8(), b.as_i32x8(), - _mm256_set1_epi32(imm8).as_i32x8(), + _mm256_set1_epi32(IMM8).as_i32x8(), )) } @@ -1164,20 +1177,19 @@ pub unsafe fn _mm256_shldi_epi32(a: __m256i, b: __m256i, imm8: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shldi_epi32&expand=5046) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_shldi_epi32( +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_shldi_epi32( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); + static_assert_imm8!(IMM8); let shf: i32x8 = vpshldvd256( a.as_i32x8(), b.as_i32x8(), - _mm256_set1_epi32(imm8).as_i32x8(), + _mm256_set1_epi32(IMM8).as_i32x8(), ); transmute(simd_select_bitmask(k, shf, src.as_i32x8())) } @@ -1187,14 +1199,18 @@ pub unsafe fn _mm256_mask_shldi_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shldi_epi32&expand=5047) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_maskz_shldi_epi32(k: __mmask8, a: __m256i, b: __m256i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_maskz_shldi_epi32( + k: __mmask8, + a: __m256i, + b: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); let shf: i32x8 = vpshldvd256( a.as_i32x8(), b.as_i32x8(), - _mm256_set1_epi32(imm8).as_i32x8(), + _mm256_set1_epi32(IMM8).as_i32x8(), ); let zero = _mm256_setzero_si256().as_i32x8(); transmute(simd_select_bitmask(k, shf, zero)) @@ -1205,14 +1221,14 @@ pub unsafe fn _mm256_maskz_shldi_epi32(k: __mmask8, a: __m256i, b: __m256i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shldi_epi32&expand=5045) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_shldi_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_shldi_epi32(a: __m128i, b: __m128i) -> __m128i { + static_assert_imm8!(IMM8); transmute(vpshldvd128( a.as_i32x4(), b.as_i32x4(), - _mm_set1_epi32(imm8).as_i32x4(), + _mm_set1_epi32(IMM8).as_i32x4(), )) } @@ -1221,17 +1237,16 @@ pub unsafe fn _mm_shldi_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shldi_epi32&expand=5043) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_shldi_epi32( +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_shldi_epi32( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, - imm8: i32, ) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); - let shf: i32x4 = vpshldvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(imm8).as_i32x4()); + static_assert_imm8!(IMM8); + let shf: i32x4 = vpshldvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(IMM8).as_i32x4()); transmute(simd_select_bitmask(k, shf, src.as_i32x4())) } @@ -1240,11 +1255,15 @@ pub unsafe fn _mm_mask_shldi_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shldi_epi32&expand=5044) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_shldi_epi32(k: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); - let shf: i32x4 = vpshldvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(imm8).as_i32x4()); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_shldi_epi32( + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); + let shf: i32x4 = vpshldvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(IMM8).as_i32x4()); let zero = _mm_setzero_si128().as_i32x4(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1254,14 +1273,15 @@ pub unsafe fn _mm_maskz_shldi_epi32(k: __mmask8, a: __m128i, b: __m128i, imm8: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shldi_epi16&expand=5042) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_shldi_epi16(a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_shldi_epi16(a: __m512i, b: __m512i) -> __m512i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; transmute(vpshldvw( a.as_i16x32(), b.as_i16x32(), - _mm512_set1_epi16(imm8 as i16).as_i16x32(), + _mm512_set1_epi16(imm8).as_i16x32(), )) } @@ -1270,20 +1290,20 @@ pub unsafe fn _mm512_shldi_epi16(a: __m512i, b: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shldi_epi16&expand=5040) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_shldi_epi16( +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_shldi_epi16( src: __m512i, k: __mmask32, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; let shf: i16x32 = vpshldvw( a.as_i16x32(), b.as_i16x32(), - _mm512_set1_epi16(imm8 as i16).as_i16x32(), + _mm512_set1_epi16(imm8).as_i16x32(), ); transmute(simd_select_bitmask(k, shf, src.as_i16x32())) } @@ -1293,14 +1313,19 @@ pub unsafe fn _mm512_mask_shldi_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shldi_epi16&expand=5041) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_shldi_epi16(k: __mmask32, a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_shldi_epi16( + k: __mmask32, + a: __m512i, + b: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; let shf: i16x32 = vpshldvw( a.as_i16x32(), b.as_i16x32(), - _mm512_set1_epi16(imm8 as i16).as_i16x32(), + _mm512_set1_epi16(imm8).as_i16x32(), ); let zero = _mm512_setzero_si512().as_i16x32(); transmute(simd_select_bitmask(k, shf, zero)) @@ -1311,14 +1336,15 @@ pub unsafe fn _mm512_maskz_shldi_epi16(k: __mmask32, a: __m512i, b: __m512i, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shldi_epi16&expand=5039) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_shldi_epi16(a: __m256i, b: __m256i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_shldi_epi16(a: __m256i, b: __m256i) -> __m256i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; transmute(vpshldvw256( a.as_i16x16(), b.as_i16x16(), - _mm256_set1_epi16(imm8 as i16).as_i16x16(), + _mm256_set1_epi16(imm8).as_i16x16(), )) } @@ -1327,20 +1353,20 @@ pub unsafe fn _mm256_shldi_epi16(a: __m256i, b: __m256i, imm8: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shldi_epi16&expand=5037) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_shldi_epi16( +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_shldi_epi16( src: __m256i, k: __mmask16, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; let shf: i16x16 = vpshldvw256( a.as_i16x16(), b.as_i16x16(), - _mm256_set1_epi16(imm8 as i16).as_i16x16(), + _mm256_set1_epi16(imm8).as_i16x16(), ); transmute(simd_select_bitmask(k, shf, src.as_i16x16())) } @@ -1350,13 +1376,19 @@ pub unsafe fn _mm256_mask_shldi_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shldi_epi16&expand=5038) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_maskz_shldi_epi16(k: __mmask16, a: __m256i, b: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_maskz_shldi_epi16( + k: __mmask16, + a: __m256i, + b: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; let shf: i16x16 = vpshldvw256( a.as_i16x16(), b.as_i16x16(), - _mm256_set1_epi16(imm8 as i16).as_i16x16(), + _mm256_set1_epi16(imm8).as_i16x16(), ); let zero = _mm256_setzero_si256().as_i16x16(); transmute(simd_select_bitmask(k, shf, zero)) @@ -1367,13 +1399,15 @@ pub unsafe fn _mm256_maskz_shldi_epi16(k: __mmask16, a: __m256i, b: __m256i, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shldi_epi16&expand=5036) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_shldi_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_shldi_epi16(a: __m128i, b: __m128i) -> __m128i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; transmute(vpshldvw128( a.as_i16x8(), b.as_i16x8(), - _mm_set1_epi16(imm8 as i16).as_i16x8(), + _mm_set1_epi16(imm8).as_i16x8(), )) } @@ -1382,20 +1416,17 @@ pub unsafe fn _mm_shldi_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shldi_epi16&expand=5034) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_shldi_epi16( +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_shldi_epi16( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, - imm8: i32, ) -> __m128i { - let shf: i16x8 = vpshldvw128( - a.as_i16x8(), - b.as_i16x8(), - _mm_set1_epi16(imm8 as i16).as_i16x8(), - ); + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; + let shf: i16x8 = vpshldvw128(a.as_i16x8(), b.as_i16x8(), _mm_set1_epi16(imm8).as_i16x8()); transmute(simd_select_bitmask(k, shf, src.as_i16x8())) } @@ -1404,14 +1435,16 @@ pub unsafe fn _mm_mask_shldi_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shldi_epi16&expand=5035) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_shldi_epi16(k: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __m128i { - let shf: i16x8 = vpshldvw128( - a.as_i16x8(), - b.as_i16x8(), - _mm_set1_epi16(imm8 as i16).as_i16x8(), - ); +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_shldi_epi16( + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; + let shf: i16x8 = vpshldvw128(a.as_i16x8(), b.as_i16x8(), _mm_set1_epi16(imm8).as_i16x8()); let zero = _mm_setzero_si128().as_i16x8(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1421,14 +1454,15 @@ pub unsafe fn _mm_maskz_shldi_epi16(k: __mmask8, a: __m128i, b: __m128i, imm8: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shrdi_epi64&expand=5114) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_shrdi_epi64(a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_shrdi_epi64(a: __m512i, b: __m512i) -> __m512i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; transmute(vpshrdvq( a.as_i64x8(), b.as_i64x8(), - _mm512_set1_epi64(imm8 as i64).as_i64x8(), + _mm512_set1_epi64(imm8).as_i64x8(), )) } @@ -1437,20 +1471,20 @@ pub unsafe fn _mm512_shrdi_epi64(a: __m512i, b: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shrdi_epi64&expand=5112) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_shrdi_epi64( +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_shrdi_epi64( src: __m512i, k: __mmask8, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; let shf: i64x8 = vpshrdvq( a.as_i64x8(), b.as_i64x8(), - _mm512_set1_epi64(imm8 as i64).as_i64x8(), + _mm512_set1_epi64(imm8).as_i64x8(), ); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } @@ -1460,14 +1494,19 @@ pub unsafe fn _mm512_mask_shrdi_epi64( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shrdi_epi64&expand=5113) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 255))] //should be vpshrdq -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_shrdi_epi64(k: __mmask8, a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 255))] //should be vpshrdq +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_shrdi_epi64( + k: __mmask8, + a: __m512i, + b: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; let shf: i64x8 = vpshrdvq( a.as_i64x8(), b.as_i64x8(), - _mm512_set1_epi64(imm8 as i64).as_i64x8(), + _mm512_set1_epi64(imm8).as_i64x8(), ); let zero = _mm512_setzero_si512().as_i64x8(); transmute(simd_select_bitmask(k, shf, zero)) @@ -1478,14 +1517,15 @@ pub unsafe fn _mm512_maskz_shrdi_epi64(k: __mmask8, a: __m512i, b: __m512i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shrdi_epi64&expand=5111) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_shrdi_epi64(a: __m256i, b: __m256i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_shrdi_epi64(a: __m256i, b: __m256i) -> __m256i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; transmute(vpshrdvq256( a.as_i64x4(), b.as_i64x4(), - _mm256_set1_epi64x(imm8 as i64).as_i64x4(), + _mm256_set1_epi64x(imm8).as_i64x4(), )) } @@ -1494,20 +1534,20 @@ pub unsafe fn _mm256_shrdi_epi64(a: __m256i, b: __m256i, imm8: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shrdi_epi64&expand=5109) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_shrdi_epi64( +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_shrdi_epi64( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; let shf: i64x4 = vpshrdvq256( a.as_i64x4(), b.as_i64x4(), - _mm256_set1_epi64x(imm8 as i64).as_i64x4(), + _mm256_set1_epi64x(imm8).as_i64x4(), ); transmute(simd_select_bitmask(k, shf, src.as_i64x4())) } @@ -1517,14 +1557,19 @@ pub unsafe fn _mm256_mask_shrdi_epi64( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shrdi_epi64&expand=5110) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_maskz_shrdi_epi64(k: __mmask8, a: __m256i, b: __m256i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_maskz_shrdi_epi64( + k: __mmask8, + a: __m256i, + b: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; let shf: i64x4 = vpshrdvq256( a.as_i64x4(), b.as_i64x4(), - _mm256_set1_epi64x(imm8 as i64).as_i64x4(), + _mm256_set1_epi64x(imm8).as_i64x4(), ); let zero = _mm256_setzero_si256().as_i64x4(); transmute(simd_select_bitmask(k, shf, zero)) @@ -1535,14 +1580,15 @@ pub unsafe fn _mm256_maskz_shrdi_epi64(k: __mmask8, a: __m256i, b: __m256i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shrdi_epi64&expand=5108) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq -#[rustc_args_required_const(2)] -pub unsafe fn _mm_shrdi_epi64(a: __m128i, b: __m128i, imm8: i32) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_shrdi_epi64(a: __m128i, b: __m128i) -> __m128i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; transmute(vpshrdvq128( a.as_i64x2(), b.as_i64x2(), - _mm_set1_epi64x(imm8 as i64).as_i64x2(), + _mm_set1_epi64x(imm8).as_i64x2(), )) } @@ -1551,21 +1597,17 @@ pub unsafe fn _mm_shrdi_epi64(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shrdi_epi64&expand=5106) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_shrdi_epi64( +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_shrdi_epi64( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, - imm8: i32, ) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); - let shf: i64x2 = vpshrdvq128( - a.as_i64x2(), - b.as_i64x2(), - _mm_set1_epi64x(imm8 as i64).as_i64x2(), - ); + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; + let shf: i64x2 = vpshrdvq128(a.as_i64x2(), b.as_i64x2(), _mm_set1_epi64x(imm8).as_i64x2()); transmute(simd_select_bitmask(k, shf, src.as_i64x2())) } @@ -1574,15 +1616,16 @@ pub unsafe fn _mm_mask_shrdi_epi64( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shrdi_epi64&expand=5107) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_shrdi_epi64(k: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); - let shf: i64x2 = vpshrdvq128( - a.as_i64x2(), - b.as_i64x2(), - _mm_set1_epi64x(imm8 as i64).as_i64x2(), - ); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_shrdi_epi64( + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; + let shf: i64x2 = vpshrdvq128(a.as_i64x2(), b.as_i64x2(), _mm_set1_epi64x(imm8).as_i64x2()); let zero = _mm_setzero_si128().as_i64x2(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1592,14 +1635,14 @@ pub unsafe fn _mm_maskz_shrdi_epi64(k: __mmask8, a: __m128i, b: __m128i, imm8: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shrdi_epi32&expand=5105) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_shrdi_epi32(a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_shrdi_epi32(a: __m512i, b: __m512i) -> __m512i { + static_assert_imm8!(IMM8); transmute(vpshrdvd( a.as_i32x16(), b.as_i32x16(), - _mm512_set1_epi32(imm8).as_i32x16(), + _mm512_set1_epi32(IMM8).as_i32x16(), )) } @@ -1608,20 +1651,19 @@ pub unsafe fn _mm512_shrdi_epi32(a: __m512i, b: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shrdi_epi32&expand=5103) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_shrdi_epi32( +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_shrdi_epi32( src: __m512i, k: __mmask16, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); + static_assert_imm8!(IMM8); let shf: i32x16 = vpshrdvd( a.as_i32x16(), b.as_i32x16(), - _mm512_set1_epi32(imm8).as_i32x16(), + _mm512_set1_epi32(IMM8).as_i32x16(), ); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } @@ -1631,14 +1673,18 @@ pub unsafe fn _mm512_mask_shrdi_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shrdi_epi32&expand=5104) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_shrdi_epi32(k: __mmask16, a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_shrdi_epi32( + k: __mmask16, + a: __m512i, + b: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); let shf: i32x16 = vpshrdvd( a.as_i32x16(), b.as_i32x16(), - _mm512_set1_epi32(imm8).as_i32x16(), + _mm512_set1_epi32(IMM8).as_i32x16(), ); let zero = _mm512_setzero_si512().as_i32x16(); transmute(simd_select_bitmask(k, shf, zero)) @@ -1649,14 +1695,14 @@ pub unsafe fn _mm512_maskz_shrdi_epi32(k: __mmask16, a: __m512i, b: __m512i, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shrdi_epi32&expand=5102) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_shrdi_epi32(a: __m256i, b: __m256i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_shrdi_epi32(a: __m256i, b: __m256i) -> __m256i { + static_assert_imm8!(IMM8); transmute(vpshrdvd256( a.as_i32x8(), b.as_i32x8(), - _mm256_set1_epi32(imm8).as_i32x8(), + _mm256_set1_epi32(IMM8).as_i32x8(), )) } @@ -1665,20 +1711,19 @@ pub unsafe fn _mm256_shrdi_epi32(a: __m256i, b: __m256i, imm8: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shrdi_epi32&expand=5100) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_shrdi_epi32( +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_shrdi_epi32( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); + static_assert_imm8!(IMM8); let shf: i32x8 = vpshrdvd256( a.as_i32x8(), b.as_i32x8(), - _mm256_set1_epi32(imm8).as_i32x8(), + _mm256_set1_epi32(IMM8).as_i32x8(), ); transmute(simd_select_bitmask(k, shf, src.as_i32x8())) } @@ -1688,14 +1733,18 @@ pub unsafe fn _mm256_mask_shrdi_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shrdi_epi32&expand=5101) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_maskz_shrdi_epi32(k: __mmask8, a: __m256i, b: __m256i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_maskz_shrdi_epi32( + k: __mmask8, + a: __m256i, + b: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); let shf: i32x8 = vpshrdvd256( a.as_i32x8(), b.as_i32x8(), - _mm256_set1_epi32(imm8).as_i32x8(), + _mm256_set1_epi32(IMM8).as_i32x8(), ); let zero = _mm256_setzero_si256().as_i32x8(); transmute(simd_select_bitmask(k, shf, zero)) @@ -1706,14 +1755,14 @@ pub unsafe fn _mm256_maskz_shrdi_epi32(k: __mmask8, a: __m256i, b: __m256i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shrdi_epi32&expand=5099) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd -#[rustc_args_required_const(2)] -pub unsafe fn _mm_shrdi_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_shrdi_epi32(a: __m128i, b: __m128i) -> __m128i { + static_assert_imm8!(IMM8); transmute(vpshrdvd128( a.as_i32x4(), b.as_i32x4(), - _mm_set1_epi32(imm8).as_i32x4(), + _mm_set1_epi32(IMM8).as_i32x4(), )) } @@ -1722,17 +1771,16 @@ pub unsafe fn _mm_shrdi_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shrdi_epi32&expand=5097) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_shrdi_epi32( +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_shrdi_epi32( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, - imm8: i32, ) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); - let shf: i32x4 = vpshrdvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(imm8).as_i32x4()); + static_assert_imm8!(IMM8); + let shf: i32x4 = vpshrdvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(IMM8).as_i32x4()); transmute(simd_select_bitmask(k, shf, src.as_i32x4())) } @@ -1741,11 +1789,15 @@ pub unsafe fn _mm_mask_shrdi_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shrdi_epi32&expand=5098) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_shrdi_epi32(k: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); - let shf: i32x4 = vpshrdvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(imm8).as_i32x4()); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_shrdi_epi32( + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); + let shf: i32x4 = vpshrdvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(IMM8).as_i32x4()); let zero = _mm_setzero_si128().as_i32x4(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1755,14 +1807,16 @@ pub unsafe fn _mm_maskz_shrdi_epi32(k: __mmask8, a: __m128i, b: __m128i, imm8: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shrdi_epi16&expand=5096) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_shrdi_epi16(a: __m512i, b: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_shrdi_epi16(a: __m512i, b: __m512i) -> __m512i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; assert!(imm8 >= 0 && imm8 <= 255); transmute(vpshrdvw( a.as_i16x32(), b.as_i16x32(), - _mm512_set1_epi16(imm8 as i16).as_i16x32(), + _mm512_set1_epi16(imm8).as_i16x32(), )) } @@ -1771,20 +1825,21 @@ pub unsafe fn _mm512_shrdi_epi16(a: __m512i, b: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shrdi_epi16&expand=5094) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_shrdi_epi16( +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_shrdi_epi16( src: __m512i, k: __mmask32, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; assert!(imm8 >= 0 && imm8 <= 255); let shf: i16x32 = vpshrdvw( a.as_i16x32(), b.as_i16x32(), - _mm512_set1_epi16(imm8 as i16).as_i16x32(), + _mm512_set1_epi16(imm8).as_i16x32(), ); transmute(simd_select_bitmask(k, shf, src.as_i16x32())) } @@ -1794,14 +1849,20 @@ pub unsafe fn _mm512_mask_shrdi_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shrdi_epi16&expand=5095) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_shrdi_epi16(k: __mmask32, a: __m512i, b: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_shrdi_epi16( + k: __mmask32, + a: __m512i, + b: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; assert!(imm8 >= 0 && imm8 <= 255); let shf: i16x32 = vpshrdvw( a.as_i16x32(), b.as_i16x32(), - _mm512_set1_epi16(imm8 as i16).as_i16x32(), + _mm512_set1_epi16(imm8).as_i16x32(), ); let zero = _mm512_setzero_si512().as_i16x32(); transmute(simd_select_bitmask(k, shf, zero)) @@ -1812,14 +1873,16 @@ pub unsafe fn _mm512_maskz_shrdi_epi16(k: __mmask32, a: __m512i, b: __m512i, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shrdi_epi16&expand=5093) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_shrdi_epi16(a: __m256i, b: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_shrdi_epi16(a: __m256i, b: __m256i) -> __m256i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; assert!(imm8 >= 0 && imm8 <= 255); transmute(vpshrdvw256( a.as_i16x16(), b.as_i16x16(), - _mm256_set1_epi16(imm8 as i16).as_i16x16(), + _mm256_set1_epi16(imm8).as_i16x16(), )) } @@ -1828,20 +1891,21 @@ pub unsafe fn _mm256_shrdi_epi16(a: __m256i, b: __m256i, imm8: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shrdi_epi16&expand=5091) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_shrdi_epi16( +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_shrdi_epi16( src: __m256i, k: __mmask16, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; assert!(imm8 >= 0 && imm8 <= 255); let shf: i16x16 = vpshrdvw256( a.as_i16x16(), b.as_i16x16(), - _mm256_set1_epi16(imm8 as i16).as_i16x16(), + _mm256_set1_epi16(imm8).as_i16x16(), ); transmute(simd_select_bitmask(k, shf, src.as_i16x16())) } @@ -1851,13 +1915,19 @@ pub unsafe fn _mm256_mask_shrdi_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shrdi_epi16&expand=5092) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_maskz_shrdi_epi16(k: __mmask16, a: __m256i, b: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_maskz_shrdi_epi16( + k: __mmask16, + a: __m256i, + b: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; let shf: i16x16 = vpshrdvw256( a.as_i16x16(), b.as_i16x16(), - _mm256_set1_epi16(imm8 as i16).as_i16x16(), + _mm256_set1_epi16(imm8).as_i16x16(), ); let zero = _mm256_setzero_si256().as_i16x16(); transmute(simd_select_bitmask(k, shf, zero)) @@ -1868,13 +1938,15 @@ pub unsafe fn _mm256_maskz_shrdi_epi16(k: __mmask16, a: __m256i, b: __m256i, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shrdi_epi16&expand=5090) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw -#[rustc_args_required_const(2)] -pub unsafe fn _mm_shrdi_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_shrdi_epi16(a: __m128i, b: __m128i) -> __m128i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; transmute(vpshrdvw128( a.as_i16x8(), b.as_i16x8(), - _mm_set1_epi16(imm8 as i16).as_i16x8(), + _mm_set1_epi16(imm8).as_i16x8(), )) } @@ -1883,20 +1955,17 @@ pub unsafe fn _mm_shrdi_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shrdi_epi16&expand=5088) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_shrdi_epi16( +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_shrdi_epi16( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, - imm8: i32, ) -> __m128i { - let shf: i16x8 = vpshrdvw128( - a.as_i16x8(), - b.as_i16x8(), - _mm_set1_epi16(imm8 as i16).as_i16x8(), - ); + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; + let shf: i16x8 = vpshrdvw128(a.as_i16x8(), b.as_i16x8(), _mm_set1_epi16(imm8).as_i16x8()); transmute(simd_select_bitmask(k, shf, src.as_i16x8())) } @@ -1905,14 +1974,16 @@ pub unsafe fn _mm_mask_shrdi_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shrdi_epi16&expand=5089) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_shrdi_epi16(k: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __m128i { - let shf: i16x8 = vpshrdvw128( - a.as_i16x8(), - b.as_i16x8(), - _mm_set1_epi16(imm8 as i16).as_i16x8(), - ); +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_shrdi_epi16( + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; + let shf: i16x8 = vpshrdvw128(a.as_i16x8(), b.as_i16x8(), _mm_set1_epi16(imm8).as_i16x8()); let zero = _mm_setzero_si128().as_i16x8(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -2921,7 +2992,7 @@ mod tests { unsafe fn test_mm512_shldi_epi64() { let a = _mm512_set1_epi64(1); let b = _mm512_set1_epi64(1 << 63); - let r = _mm512_shldi_epi64(a, b, 2); + let r = _mm512_shldi_epi64::<2>(a, b); let e = _mm512_set1_epi64(6); assert_eq_m512i(r, e); } @@ -2930,9 +3001,9 @@ mod tests { unsafe fn test_mm512_mask_shldi_epi64() { let a = _mm512_set1_epi64(1); let b = _mm512_set1_epi64(1 << 63); - let r = _mm512_mask_shldi_epi64(a, 0, a, b, 2); + let r = _mm512_mask_shldi_epi64::<2>(a, 0, a, b); assert_eq_m512i(r, a); - let r = _mm512_mask_shldi_epi64(a, 0b11111111, a, b, 2); + let r = _mm512_mask_shldi_epi64::<2>(a, 0b11111111, a, b); let e = _mm512_set1_epi64(6); assert_eq_m512i(r, e); } @@ -2941,9 +3012,9 @@ mod tests { unsafe fn test_mm512_maskz_shldi_epi64() { let a = _mm512_set1_epi64(1); let b = _mm512_set1_epi64(1 << 63); - let r = _mm512_maskz_shldi_epi64(0, a, b, 2); + let r = _mm512_maskz_shldi_epi64::<2>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_shldi_epi64(0b11111111, a, b, 2); + let r = _mm512_maskz_shldi_epi64::<2>(0b11111111, a, b); let e = _mm512_set1_epi64(6); assert_eq_m512i(r, e); } @@ -2952,7 +3023,7 @@ mod tests { unsafe fn test_mm256_shldi_epi64() { let a = _mm256_set1_epi64x(1); let b = _mm256_set1_epi64x(1 << 63); - let r = _mm256_shldi_epi64(a, b, 2); + let r = _mm256_shldi_epi64::<2>(a, b); let e = _mm256_set1_epi64x(6); assert_eq_m256i(r, e); } @@ -2961,9 +3032,9 @@ mod tests { unsafe fn test_mm256_mask_shldi_epi64() { let a = _mm256_set1_epi64x(1); let b = _mm256_set1_epi64x(1 << 63); - let r = _mm256_mask_shldi_epi64(a, 0, a, b, 2); + let r = _mm256_mask_shldi_epi64::<2>(a, 0, a, b); assert_eq_m256i(r, a); - let r = _mm256_mask_shldi_epi64(a, 0b00001111, a, b, 2); + let r = _mm256_mask_shldi_epi64::<2>(a, 0b00001111, a, b); let e = _mm256_set1_epi64x(6); assert_eq_m256i(r, e); } @@ -2972,9 +3043,9 @@ mod tests { unsafe fn test_mm256_maskz_shldi_epi64() { let a = _mm256_set1_epi64x(1); let b = _mm256_set1_epi64x(1 << 63); - let r = _mm256_maskz_shldi_epi64(0, a, b, 2); + let r = _mm256_maskz_shldi_epi64::<2>(0, a, b); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_shldi_epi64(0b00001111, a, b, 2); + let r = _mm256_maskz_shldi_epi64::<2>(0b00001111, a, b); let e = _mm256_set1_epi64x(6); assert_eq_m256i(r, e); } @@ -2983,7 +3054,7 @@ mod tests { unsafe fn test_mm_shldi_epi64() { let a = _mm_set1_epi64x(1); let b = _mm_set1_epi64x(1 << 63); - let r = _mm_shldi_epi64(a, b, 2); + let r = _mm_shldi_epi64::<2>(a, b); let e = _mm_set1_epi64x(6); assert_eq_m128i(r, e); } @@ -2992,9 +3063,9 @@ mod tests { unsafe fn test_mm_mask_shldi_epi64() { let a = _mm_set1_epi64x(1); let b = _mm_set1_epi64x(1 << 63); - let r = _mm_mask_shldi_epi64(a, 0, a, b, 2); + let r = _mm_mask_shldi_epi64::<2>(a, 0, a, b); assert_eq_m128i(r, a); - let r = _mm_mask_shldi_epi64(a, 0b00000011, a, b, 2); + let r = _mm_mask_shldi_epi64::<2>(a, 0b00000011, a, b); let e = _mm_set1_epi64x(6); assert_eq_m128i(r, e); } @@ -3003,9 +3074,9 @@ mod tests { unsafe fn test_mm_maskz_shldi_epi64() { let a = _mm_set1_epi64x(1); let b = _mm_set1_epi64x(1 << 63); - let r = _mm_maskz_shldi_epi64(0, a, b, 2); + let r = _mm_maskz_shldi_epi64::<2>(0, a, b); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_shldi_epi64(0b00000011, a, b, 2); + let r = _mm_maskz_shldi_epi64::<2>(0b00000011, a, b); let e = _mm_set1_epi64x(6); assert_eq_m128i(r, e); } @@ -3014,7 +3085,7 @@ mod tests { unsafe fn test_mm512_shldi_epi32() { let a = _mm512_set1_epi32(1); let b = _mm512_set1_epi32(1 << 31); - let r = _mm512_shldi_epi32(a, b, 2); + let r = _mm512_shldi_epi32::<2>(a, b); let e = _mm512_set1_epi32(6); assert_eq_m512i(r, e); } @@ -3023,9 +3094,9 @@ mod tests { unsafe fn test_mm512_mask_shldi_epi32() { let a = _mm512_set1_epi32(1); let b = _mm512_set1_epi32(1 << 31); - let r = _mm512_mask_shldi_epi32(a, 0, a, b, 2); + let r = _mm512_mask_shldi_epi32::<2>(a, 0, a, b); assert_eq_m512i(r, a); - let r = _mm512_mask_shldi_epi32(a, 0b11111111_11111111, a, b, 2); + let r = _mm512_mask_shldi_epi32::<2>(a, 0b11111111_11111111, a, b); let e = _mm512_set1_epi32(6); assert_eq_m512i(r, e); } @@ -3034,9 +3105,9 @@ mod tests { unsafe fn test_mm512_maskz_shldi_epi32() { let a = _mm512_set1_epi32(1); let b = _mm512_set1_epi32(1 << 31); - let r = _mm512_maskz_shldi_epi32(0, a, b, 2); + let r = _mm512_maskz_shldi_epi32::<2>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_shldi_epi32(0b11111111_11111111, a, b, 2); + let r = _mm512_maskz_shldi_epi32::<2>(0b11111111_11111111, a, b); let e = _mm512_set1_epi32(6); assert_eq_m512i(r, e); } @@ -3045,7 +3116,7 @@ mod tests { unsafe fn test_mm256_shldi_epi32() { let a = _mm256_set1_epi32(1); let b = _mm256_set1_epi32(1 << 31); - let r = _mm256_shldi_epi32(a, b, 2); + let r = _mm256_shldi_epi32::<2>(a, b); let e = _mm256_set1_epi32(6); assert_eq_m256i(r, e); } @@ -3054,9 +3125,9 @@ mod tests { unsafe fn test_mm256_mask_shldi_epi32() { let a = _mm256_set1_epi32(1); let b = _mm256_set1_epi32(1 << 31); - let r = _mm256_mask_shldi_epi32(a, 0, a, b, 2); + let r = _mm256_mask_shldi_epi32::<2>(a, 0, a, b); assert_eq_m256i(r, a); - let r = _mm256_mask_shldi_epi32(a, 0b11111111, a, b, 2); + let r = _mm256_mask_shldi_epi32::<2>(a, 0b11111111, a, b); let e = _mm256_set1_epi32(6); assert_eq_m256i(r, e); } @@ -3065,9 +3136,9 @@ mod tests { unsafe fn test_mm256_maskz_shldi_epi32() { let a = _mm256_set1_epi32(1); let b = _mm256_set1_epi32(1 << 31); - let r = _mm256_maskz_shldi_epi32(0, a, b, 2); + let r = _mm256_maskz_shldi_epi32::<2>(0, a, b); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_shldi_epi32(0b11111111, a, b, 2); + let r = _mm256_maskz_shldi_epi32::<2>(0b11111111, a, b); let e = _mm256_set1_epi32(6); assert_eq_m256i(r, e); } @@ -3076,7 +3147,7 @@ mod tests { unsafe fn test_mm_shldi_epi32() { let a = _mm_set1_epi32(1); let b = _mm_set1_epi32(1 << 31); - let r = _mm_shldi_epi32(a, b, 2); + let r = _mm_shldi_epi32::<2>(a, b); let e = _mm_set1_epi32(6); assert_eq_m128i(r, e); } @@ -3085,9 +3156,9 @@ mod tests { unsafe fn test_mm_mask_shldi_epi32() { let a = _mm_set1_epi32(1); let b = _mm_set1_epi32(1 << 31); - let r = _mm_mask_shldi_epi32(a, 0, a, b, 2); + let r = _mm_mask_shldi_epi32::<2>(a, 0, a, b); assert_eq_m128i(r, a); - let r = _mm_mask_shldi_epi32(a, 0b00001111, a, b, 2); + let r = _mm_mask_shldi_epi32::<2>(a, 0b00001111, a, b); let e = _mm_set1_epi32(6); assert_eq_m128i(r, e); } @@ -3096,9 +3167,9 @@ mod tests { unsafe fn test_mm_maskz_shldi_epi32() { let a = _mm_set1_epi32(1); let b = _mm_set1_epi32(1 << 31); - let r = _mm_maskz_shldi_epi32(0, a, b, 2); + let r = _mm_maskz_shldi_epi32::<2>(0, a, b); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_shldi_epi32(0b00001111, a, b, 2); + let r = _mm_maskz_shldi_epi32::<2>(0b00001111, a, b); let e = _mm_set1_epi32(6); assert_eq_m128i(r, e); } @@ -3107,7 +3178,7 @@ mod tests { unsafe fn test_mm512_shldi_epi16() { let a = _mm512_set1_epi16(1); let b = _mm512_set1_epi16(1 << 15); - let r = _mm512_shldi_epi16(a, b, 2); + let r = _mm512_shldi_epi16::<2>(a, b); let e = _mm512_set1_epi16(6); assert_eq_m512i(r, e); } @@ -3116,9 +3187,9 @@ mod tests { unsafe fn test_mm512_mask_shldi_epi16() { let a = _mm512_set1_epi16(1); let b = _mm512_set1_epi16(1 << 15); - let r = _mm512_mask_shldi_epi16(a, 0, a, b, 2); + let r = _mm512_mask_shldi_epi16::<2>(a, 0, a, b); assert_eq_m512i(r, a); - let r = _mm512_mask_shldi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b, 2); + let r = _mm512_mask_shldi_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a, b); let e = _mm512_set1_epi16(6); assert_eq_m512i(r, e); } @@ -3127,9 +3198,9 @@ mod tests { unsafe fn test_mm512_maskz_shldi_epi16() { let a = _mm512_set1_epi16(1); let b = _mm512_set1_epi16(1 << 15); - let r = _mm512_maskz_shldi_epi16(0, a, b, 2); + let r = _mm512_maskz_shldi_epi16::<2>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_shldi_epi16(0b11111111_11111111_11111111_11111111, a, b, 2); + let r = _mm512_maskz_shldi_epi16::<2>(0b11111111_11111111_11111111_11111111, a, b); let e = _mm512_set1_epi16(6); assert_eq_m512i(r, e); } @@ -3138,7 +3209,7 @@ mod tests { unsafe fn test_mm256_shldi_epi16() { let a = _mm256_set1_epi16(1); let b = _mm256_set1_epi16(1 << 15); - let r = _mm256_shldi_epi16(a, b, 2); + let r = _mm256_shldi_epi16::<2>(a, b); let e = _mm256_set1_epi16(6); assert_eq_m256i(r, e); } @@ -3147,9 +3218,9 @@ mod tests { unsafe fn test_mm256_mask_shldi_epi16() { let a = _mm256_set1_epi16(1); let b = _mm256_set1_epi16(1 << 15); - let r = _mm256_mask_shldi_epi16(a, 0, a, b, 2); + let r = _mm256_mask_shldi_epi16::<2>(a, 0, a, b); assert_eq_m256i(r, a); - let r = _mm256_mask_shldi_epi16(a, 0b11111111_11111111, a, b, 2); + let r = _mm256_mask_shldi_epi16::<2>(a, 0b11111111_11111111, a, b); let e = _mm256_set1_epi16(6); assert_eq_m256i(r, e); } @@ -3158,9 +3229,9 @@ mod tests { unsafe fn test_mm256_maskz_shldi_epi16() { let a = _mm256_set1_epi16(1); let b = _mm256_set1_epi16(1 << 15); - let r = _mm256_maskz_shldi_epi16(0, a, b, 2); + let r = _mm256_maskz_shldi_epi16::<2>(0, a, b); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_shldi_epi16(0b11111111_11111111, a, b, 2); + let r = _mm256_maskz_shldi_epi16::<2>(0b11111111_11111111, a, b); let e = _mm256_set1_epi16(6); assert_eq_m256i(r, e); } @@ -3169,7 +3240,7 @@ mod tests { unsafe fn test_mm_shldi_epi16() { let a = _mm_set1_epi16(1); let b = _mm_set1_epi16(1 << 15); - let r = _mm_shldi_epi16(a, b, 2); + let r = _mm_shldi_epi16::<2>(a, b); let e = _mm_set1_epi16(6); assert_eq_m128i(r, e); } @@ -3178,9 +3249,9 @@ mod tests { unsafe fn test_mm_mask_shldi_epi16() { let a = _mm_set1_epi16(1); let b = _mm_set1_epi16(1 << 15); - let r = _mm_mask_shldi_epi16(a, 0, a, b, 2); + let r = _mm_mask_shldi_epi16::<2>(a, 0, a, b); assert_eq_m128i(r, a); - let r = _mm_mask_shldi_epi16(a, 0b11111111, a, b, 2); + let r = _mm_mask_shldi_epi16::<2>(a, 0b11111111, a, b); let e = _mm_set1_epi16(6); assert_eq_m128i(r, e); } @@ -3189,9 +3260,9 @@ mod tests { unsafe fn test_mm_maskz_shldi_epi16() { let a = _mm_set1_epi16(1); let b = _mm_set1_epi16(1 << 15); - let r = _mm_maskz_shldi_epi16(0, a, b, 2); + let r = _mm_maskz_shldi_epi16::<2>(0, a, b); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_shldi_epi16(0b11111111, a, b, 2); + let r = _mm_maskz_shldi_epi16::<2>(0b11111111, a, b); let e = _mm_set1_epi16(6); assert_eq_m128i(r, e); } @@ -3200,7 +3271,7 @@ mod tests { unsafe fn test_mm512_shrdi_epi64() { let a = _mm512_set1_epi64(8); let b = _mm512_set1_epi64(2); - let r = _mm512_shrdi_epi64(a, b, 1); + let r = _mm512_shrdi_epi64::<1>(a, b); let e = _mm512_set1_epi64(1); assert_eq_m512i(r, e); } @@ -3209,9 +3280,9 @@ mod tests { unsafe fn test_mm512_mask_shrdi_epi64() { let a = _mm512_set1_epi64(8); let b = _mm512_set1_epi64(2); - let r = _mm512_mask_shrdi_epi64(a, 0, a, b, 1); + let r = _mm512_mask_shrdi_epi64::<1>(a, 0, a, b); assert_eq_m512i(r, a); - let r = _mm512_mask_shrdi_epi64(a, 0b11111111, a, b, 1); + let r = _mm512_mask_shrdi_epi64::<1>(a, 0b11111111, a, b); let e = _mm512_set1_epi64(1); assert_eq_m512i(r, e); } @@ -3220,9 +3291,9 @@ mod tests { unsafe fn test_mm512_maskz_shrdi_epi64() { let a = _mm512_set1_epi64(8); let b = _mm512_set1_epi64(2); - let r = _mm512_maskz_shrdi_epi64(0, a, b, 1); + let r = _mm512_maskz_shrdi_epi64::<1>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_shrdi_epi64(0b11111111, a, b, 1); + let r = _mm512_maskz_shrdi_epi64::<1>(0b11111111, a, b); let e = _mm512_set1_epi64(1); assert_eq_m512i(r, e); } @@ -3231,7 +3302,7 @@ mod tests { unsafe fn test_mm256_shrdi_epi64() { let a = _mm256_set1_epi64x(8); let b = _mm256_set1_epi64x(2); - let r = _mm256_shrdi_epi64(a, b, 1); + let r = _mm256_shrdi_epi64::<1>(a, b); let e = _mm256_set1_epi64x(1); assert_eq_m256i(r, e); } @@ -3240,9 +3311,9 @@ mod tests { unsafe fn test_mm256_mask_shrdi_epi64() { let a = _mm256_set1_epi64x(8); let b = _mm256_set1_epi64x(2); - let r = _mm256_mask_shrdi_epi64(a, 0, a, b, 1); + let r = _mm256_mask_shrdi_epi64::<1>(a, 0, a, b); assert_eq_m256i(r, a); - let r = _mm256_mask_shrdi_epi64(a, 0b00001111, a, b, 1); + let r = _mm256_mask_shrdi_epi64::<1>(a, 0b00001111, a, b); let e = _mm256_set1_epi64x(1); assert_eq_m256i(r, e); } @@ -3251,9 +3322,9 @@ mod tests { unsafe fn test_mm256_maskz_shrdi_epi64() { let a = _mm256_set1_epi64x(8); let b = _mm256_set1_epi64x(2); - let r = _mm256_maskz_shrdi_epi64(0, a, b, 1); + let r = _mm256_maskz_shrdi_epi64::<1>(0, a, b); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_shrdi_epi64(0b00001111, a, b, 1); + let r = _mm256_maskz_shrdi_epi64::<1>(0b00001111, a, b); let e = _mm256_set1_epi64x(1); assert_eq_m256i(r, e); } @@ -3262,7 +3333,7 @@ mod tests { unsafe fn test_mm_shrdi_epi64() { let a = _mm_set1_epi64x(8); let b = _mm_set1_epi64x(2); - let r = _mm_shrdi_epi64(a, b, 1); + let r = _mm_shrdi_epi64::<1>(a, b); let e = _mm_set1_epi64x(1); assert_eq_m128i(r, e); } @@ -3271,9 +3342,9 @@ mod tests { unsafe fn test_mm_mask_shrdi_epi64() { let a = _mm_set1_epi64x(8); let b = _mm_set1_epi64x(2); - let r = _mm_mask_shrdi_epi64(a, 0, a, b, 1); + let r = _mm_mask_shrdi_epi64::<1>(a, 0, a, b); assert_eq_m128i(r, a); - let r = _mm_mask_shrdi_epi64(a, 0b00000011, a, b, 1); + let r = _mm_mask_shrdi_epi64::<1>(a, 0b00000011, a, b); let e = _mm_set1_epi64x(1); assert_eq_m128i(r, e); } @@ -3282,9 +3353,9 @@ mod tests { unsafe fn test_mm_maskz_shrdi_epi64() { let a = _mm_set1_epi64x(8); let b = _mm_set1_epi64x(2); - let r = _mm_maskz_shrdi_epi64(0, a, b, 1); + let r = _mm_maskz_shrdi_epi64::<1>(0, a, b); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_shrdi_epi64(0b00000011, a, b, 1); + let r = _mm_maskz_shrdi_epi64::<1>(0b00000011, a, b); let e = _mm_set1_epi64x(1); assert_eq_m128i(r, e); } @@ -3293,7 +3364,7 @@ mod tests { unsafe fn test_mm512_shrdi_epi32() { let a = _mm512_set1_epi32(8); let b = _mm512_set1_epi32(2); - let r = _mm512_shrdi_epi32(a, b, 1); + let r = _mm512_shrdi_epi32::<1>(a, b); let e = _mm512_set1_epi32(1); assert_eq_m512i(r, e); } @@ -3302,9 +3373,9 @@ mod tests { unsafe fn test_mm512_mask_shrdi_epi32() { let a = _mm512_set1_epi32(8); let b = _mm512_set1_epi32(2); - let r = _mm512_mask_shrdi_epi32(a, 0, a, b, 1); + let r = _mm512_mask_shrdi_epi32::<1>(a, 0, a, b); assert_eq_m512i(r, a); - let r = _mm512_mask_shrdi_epi32(a, 0b11111111_11111111, a, b, 1); + let r = _mm512_mask_shrdi_epi32::<1>(a, 0b11111111_11111111, a, b); let e = _mm512_set1_epi32(1); assert_eq_m512i(r, e); } @@ -3313,9 +3384,9 @@ mod tests { unsafe fn test_mm512_maskz_shrdi_epi32() { let a = _mm512_set1_epi32(8); let b = _mm512_set1_epi32(2); - let r = _mm512_maskz_shrdi_epi32(0, a, b, 1); + let r = _mm512_maskz_shrdi_epi32::<1>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_shrdi_epi32(0b11111111_11111111, a, b, 1); + let r = _mm512_maskz_shrdi_epi32::<1>(0b11111111_11111111, a, b); let e = _mm512_set1_epi32(1); assert_eq_m512i(r, e); } @@ -3324,7 +3395,7 @@ mod tests { unsafe fn test_mm256_shrdi_epi32() { let a = _mm256_set1_epi32(8); let b = _mm256_set1_epi32(2); - let r = _mm256_shrdi_epi32(a, b, 1); + let r = _mm256_shrdi_epi32::<1>(a, b); let e = _mm256_set1_epi32(1); assert_eq_m256i(r, e); } @@ -3333,9 +3404,9 @@ mod tests { unsafe fn test_mm256_mask_shrdi_epi32() { let a = _mm256_set1_epi32(8); let b = _mm256_set1_epi32(2); - let r = _mm256_mask_shrdi_epi32(a, 0, a, b, 1); + let r = _mm256_mask_shrdi_epi32::<1>(a, 0, a, b); assert_eq_m256i(r, a); - let r = _mm256_mask_shrdi_epi32(a, 0b11111111, a, b, 1); + let r = _mm256_mask_shrdi_epi32::<1>(a, 0b11111111, a, b); let e = _mm256_set1_epi32(1); assert_eq_m256i(r, e); } @@ -3344,9 +3415,9 @@ mod tests { unsafe fn test_mm256_maskz_shrdi_epi32() { let a = _mm256_set1_epi32(8); let b = _mm256_set1_epi32(2); - let r = _mm256_maskz_shrdi_epi32(0, a, b, 1); + let r = _mm256_maskz_shrdi_epi32::<1>(0, a, b); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_shrdi_epi32(0b11111111, a, b, 1); + let r = _mm256_maskz_shrdi_epi32::<1>(0b11111111, a, b); let e = _mm256_set1_epi32(1); assert_eq_m256i(r, e); } @@ -3355,7 +3426,7 @@ mod tests { unsafe fn test_mm_shrdi_epi32() { let a = _mm_set1_epi32(8); let b = _mm_set1_epi32(2); - let r = _mm_shrdi_epi32(a, b, 1); + let r = _mm_shrdi_epi32::<1>(a, b); let e = _mm_set1_epi32(1); assert_eq_m128i(r, e); } @@ -3364,9 +3435,9 @@ mod tests { unsafe fn test_mm_mask_shrdi_epi32() { let a = _mm_set1_epi32(8); let b = _mm_set1_epi32(2); - let r = _mm_mask_shrdi_epi32(a, 0, a, b, 1); + let r = _mm_mask_shrdi_epi32::<1>(a, 0, a, b); assert_eq_m128i(r, a); - let r = _mm_mask_shrdi_epi32(a, 0b00001111, a, b, 1); + let r = _mm_mask_shrdi_epi32::<1>(a, 0b00001111, a, b); let e = _mm_set1_epi32(1); assert_eq_m128i(r, e); } @@ -3375,9 +3446,9 @@ mod tests { unsafe fn test_mm_maskz_shrdi_epi32() { let a = _mm_set1_epi32(8); let b = _mm_set1_epi32(2); - let r = _mm_maskz_shrdi_epi32(0, a, b, 1); + let r = _mm_maskz_shrdi_epi32::<1>(0, a, b); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_shrdi_epi32(0b00001111, a, b, 1); + let r = _mm_maskz_shrdi_epi32::<1>(0b00001111, a, b); let e = _mm_set1_epi32(1); assert_eq_m128i(r, e); } @@ -3386,7 +3457,7 @@ mod tests { unsafe fn test_mm512_shrdi_epi16() { let a = _mm512_set1_epi16(8); let b = _mm512_set1_epi16(2); - let r = _mm512_shrdi_epi16(a, b, 1); + let r = _mm512_shrdi_epi16::<1>(a, b); let e = _mm512_set1_epi16(1); assert_eq_m512i(r, e); } @@ -3395,9 +3466,9 @@ mod tests { unsafe fn test_mm512_mask_shrdi_epi16() { let a = _mm512_set1_epi16(8); let b = _mm512_set1_epi16(2); - let r = _mm512_mask_shrdi_epi16(a, 0, a, b, 1); + let r = _mm512_mask_shrdi_epi16::<1>(a, 0, a, b); assert_eq_m512i(r, a); - let r = _mm512_mask_shrdi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b, 1); + let r = _mm512_mask_shrdi_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a, b); let e = _mm512_set1_epi16(1); assert_eq_m512i(r, e); } @@ -3406,9 +3477,9 @@ mod tests { unsafe fn test_mm512_maskz_shrdi_epi16() { let a = _mm512_set1_epi16(8); let b = _mm512_set1_epi16(2); - let r = _mm512_maskz_shrdi_epi16(0, a, b, 1); + let r = _mm512_maskz_shrdi_epi16::<1>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_shrdi_epi16(0b11111111_11111111_11111111_11111111, a, b, 1); + let r = _mm512_maskz_shrdi_epi16::<1>(0b11111111_11111111_11111111_11111111, a, b); let e = _mm512_set1_epi16(1); assert_eq_m512i(r, e); } @@ -3417,7 +3488,7 @@ mod tests { unsafe fn test_mm256_shrdi_epi16() { let a = _mm256_set1_epi16(8); let b = _mm256_set1_epi16(2); - let r = _mm256_shrdi_epi16(a, b, 1); + let r = _mm256_shrdi_epi16::<1>(a, b); let e = _mm256_set1_epi16(1); assert_eq_m256i(r, e); } @@ -3426,9 +3497,9 @@ mod tests { unsafe fn test_mm256_mask_shrdi_epi16() { let a = _mm256_set1_epi16(8); let b = _mm256_set1_epi16(2); - let r = _mm256_mask_shrdi_epi16(a, 0, a, b, 1); + let r = _mm256_mask_shrdi_epi16::<1>(a, 0, a, b); assert_eq_m256i(r, a); - let r = _mm256_mask_shrdi_epi16(a, 0b11111111_11111111, a, b, 1); + let r = _mm256_mask_shrdi_epi16::<1>(a, 0b11111111_11111111, a, b); let e = _mm256_set1_epi16(1); assert_eq_m256i(r, e); } @@ -3437,9 +3508,9 @@ mod tests { unsafe fn test_mm256_maskz_shrdi_epi16() { let a = _mm256_set1_epi16(8); let b = _mm256_set1_epi16(2); - let r = _mm256_maskz_shrdi_epi16(0, a, b, 1); + let r = _mm256_maskz_shrdi_epi16::<1>(0, a, b); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_shrdi_epi16(0b11111111_11111111, a, b, 1); + let r = _mm256_maskz_shrdi_epi16::<1>(0b11111111_11111111, a, b); let e = _mm256_set1_epi16(1); assert_eq_m256i(r, e); } @@ -3448,7 +3519,7 @@ mod tests { unsafe fn test_mm_shrdi_epi16() { let a = _mm_set1_epi16(8); let b = _mm_set1_epi16(2); - let r = _mm_shrdi_epi16(a, b, 1); + let r = _mm_shrdi_epi16::<1>(a, b); let e = _mm_set1_epi16(1); assert_eq_m128i(r, e); } @@ -3457,9 +3528,9 @@ mod tests { unsafe fn test_mm_mask_shrdi_epi16() { let a = _mm_set1_epi16(8); let b = _mm_set1_epi16(2); - let r = _mm_mask_shrdi_epi16(a, 0, a, b, 1); + let r = _mm_mask_shrdi_epi16::<1>(a, 0, a, b); assert_eq_m128i(r, a); - let r = _mm_mask_shrdi_epi16(a, 0b11111111, a, b, 1); + let r = _mm_mask_shrdi_epi16::<1>(a, 0b11111111, a, b); let e = _mm_set1_epi16(1); assert_eq_m128i(r, e); } @@ -3468,9 +3539,9 @@ mod tests { unsafe fn test_mm_maskz_shrdi_epi16() { let a = _mm_set1_epi16(8); let b = _mm_set1_epi16(2); - let r = _mm_maskz_shrdi_epi16(0, a, b, 1); + let r = _mm_maskz_shrdi_epi16::<1>(0, a, b); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_shrdi_epi16(0b11111111, a, b, 1); + let r = _mm_maskz_shrdi_epi16::<1>(0b11111111, a, b); let e = _mm_set1_epi16(1); assert_eq_m128i(r, e); } From a2b7f9d4b159e05b8fa305d9222d370523d89cd5 Mon Sep 17 00:00:00 2001 From: jironglin Date: Fri, 5 Mar 2021 00:33:09 +0000 Subject: [PATCH 017/123] ror_epi32,epi64, rol_epi32_epi64, srai_epi32 --- crates/core_arch/src/x86/avx512f.rs | 760 +++++++++++-------------- crates/core_arch/src/x86_64/avx512f.rs | 60 +- 2 files changed, 360 insertions(+), 460 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 5abe23e093..5e5104b618 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -16624,16 +16624,12 @@ pub unsafe fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rol_epi32&expand=4685) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_rol_epi32(a: __m512i) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vprold(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprold(a, IMM8); transmute(r) } @@ -16642,17 +16638,17 @@ pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rol_epi32&expand=4683) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_rol_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vprold(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, rol, src.as_i32x16())) + let r = vprold(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x16())) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -16660,18 +16656,14 @@ pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rol_epi32&expand=4684) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vprold(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); + let r = vprold(a, IMM8); let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, rol, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -16679,16 +16671,12 @@ pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rol_epi32&expand=4682) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm256_rol_epi32(a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm256_rol_epi32(a: __m256i) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vprold256(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprold256(a, IMM8); transmute(r) } @@ -16697,17 +16685,17 @@ pub unsafe fn _mm256_rol_epi32(a: __m256i, imm8: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rol_epi32&expand=4680) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_mask_rol_epi32(src: __m256i, k: __mmask8, a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_mask_rol_epi32( + src: __m256i, + k: __mmask8, + a: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vprold256(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, rol, src.as_i32x8())) + let r = vprold256(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x8())) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -16715,18 +16703,14 @@ pub unsafe fn _mm256_mask_rol_epi32(src: __m256i, k: __mmask8, a: __m256i, imm8: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rol_epi32&expand=4681) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_maskz_rol_epi32(k: __mmask8, a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_maskz_rol_epi32(k: __mmask8, a: __m256i) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vprold256(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); + let r = vprold256(a, IMM8); let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, rol, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -16734,16 +16718,12 @@ pub unsafe fn _mm256_maskz_rol_epi32(k: __mmask8, a: __m256i, imm8: i32) -> __m2 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rol_epi32&expand=4679) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_rol_epi32(a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_rol_epi32(a: __m128i) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i32x4(); - macro_rules! call { - ($imm8:expr) => { - vprold128(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprold128(a, IMM8); transmute(r) } @@ -16752,17 +16732,17 @@ pub unsafe fn _mm_rol_epi32(a: __m128i, imm8: i32) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rol_epi32&expand=4677) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_mask_rol_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_mask_rol_epi32( + src: __m128i, + k: __mmask8, + a: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i32x4(); - macro_rules! call { - ($imm8:expr) => { - vprold128(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, rol, src.as_i32x4())) + let r = vprold128(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x4())) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -16770,18 +16750,14 @@ pub unsafe fn _mm_mask_rol_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: i3 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rol_epi32&expand=4678) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_maskz_rol_epi32(k: __mmask8, a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_maskz_rol_epi32(k: __mmask8, a: __m128i) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i32x4(); - macro_rules! call { - ($imm8:expr) => { - vprold128(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); + let r = vprold128(a, IMM8); let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, rol, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -16789,16 +16765,12 @@ pub unsafe fn _mm_maskz_rol_epi32(k: __mmask8, a: __m128i, imm8: i32) -> __m128i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_ror_epi32&expand=4721) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_ror_epi32(a: __m512i) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vprord(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprord(a, IMM8); transmute(r) } @@ -16807,17 +16779,17 @@ pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_ror_epi32&expand=4719) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 123))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_ror_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vprord(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, ror, src.as_i32x16())) + let r = vprord(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x16())) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -16825,18 +16797,14 @@ pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_ror_epi32&expand=4720) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 123))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vprord(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); + let r = vprord(a, IMM8); let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, ror, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -16844,16 +16812,12 @@ pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_ror_epi32&expand=4718) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm256_ror_epi32(a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm256_ror_epi32(a: __m256i) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vprord256(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprord256(a, IMM8); transmute(r) } @@ -16862,17 +16826,17 @@ pub unsafe fn _mm256_ror_epi32(a: __m256i, imm8: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_ror_epi32&expand=4716) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 123))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_mask_ror_epi32(src: __m256i, k: __mmask8, a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_mask_ror_epi32( + src: __m256i, + k: __mmask8, + a: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vprord256(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, ror, src.as_i32x8())) + let r = vprord256(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x8())) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -16880,18 +16844,14 @@ pub unsafe fn _mm256_mask_ror_epi32(src: __m256i, k: __mmask8, a: __m256i, imm8: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_ror_epi32&expand=4717) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 123))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_maskz_ror_epi32(k: __mmask8, a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_maskz_ror_epi32(k: __mmask8, a: __m256i) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vprord256(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); + let r = vprord256(a, IMM8); let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, ror, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -16899,16 +16859,12 @@ pub unsafe fn _mm256_maskz_ror_epi32(k: __mmask8, a: __m256i, imm8: i32) -> __m2 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ror_epi32&expand=4715) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_ror_epi32(a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_ror_epi32(a: __m128i) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i32x4(); - macro_rules! call { - ($imm8:expr) => { - vprord128(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprord128(a, IMM8); transmute(r) } @@ -16917,17 +16873,17 @@ pub unsafe fn _mm_ror_epi32(a: __m128i, imm8: i32) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_ror_epi32&expand=4713) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 123))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_mask_ror_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_mask_ror_epi32( + src: __m128i, + k: __mmask8, + a: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i32x4(); - macro_rules! call { - ($imm8:expr) => { - vprord128(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, ror, src.as_i32x4())) + let r = vprord128(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x4())) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -16935,18 +16891,14 @@ pub unsafe fn _mm_mask_ror_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: i3 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_ror_epi32&expand=4714) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 123))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_maskz_ror_epi32(k: __mmask8, a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_maskz_ror_epi32(k: __mmask8, a: __m128i) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i32x4(); - macro_rules! call { - ($imm8:expr) => { - vprord128(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); + let r = vprord128(a, IMM8); let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, ror, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -16954,16 +16906,12 @@ pub unsafe fn _mm_maskz_ror_epi32(k: __mmask8, a: __m128i, imm8: i32) -> __m128i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rol_epi64&expand=4694) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_rol_epi64(a: __m512i) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vprolq(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprolq(a, IMM8); transmute(r) } @@ -16972,17 +16920,17 @@ pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rol_epi64&expand=4692) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_rol_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vprolq(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, rol, src.as_i64x8())) + let r = vprolq(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x8())) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -16990,18 +16938,14 @@ pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rol_epi64&expand=4693) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vprolq(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); + let r = vprolq(a, IMM8); let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, rol, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -17009,16 +16953,12 @@ pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m5 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rol_epi64&expand=4691) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm256_rol_epi64(a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm256_rol_epi64(a: __m256i) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i64x4(); - macro_rules! call { - ($imm8:expr) => { - vprolq256(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprolq256(a, IMM8); transmute(r) } @@ -17027,17 +16967,17 @@ pub unsafe fn _mm256_rol_epi64(a: __m256i, imm8: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rol_epi64&expand=4689) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_mask_rol_epi64(src: __m256i, k: __mmask8, a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_mask_rol_epi64( + src: __m256i, + k: __mmask8, + a: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i64x4(); - macro_rules! call { - ($imm8:expr) => { - vprolq256(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, rol, src.as_i64x4())) + let r = vprolq256(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x4())) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17045,18 +16985,14 @@ pub unsafe fn _mm256_mask_rol_epi64(src: __m256i, k: __mmask8, a: __m256i, imm8: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rol_epi64&expand=4690) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_maskz_rol_epi64(k: __mmask8, a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_maskz_rol_epi64(k: __mmask8, a: __m256i) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i64x4(); - macro_rules! call { - ($imm8:expr) => { - vprolq256(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); + let r = vprolq256(a, IMM8); let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, rol, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -17064,16 +17000,12 @@ pub unsafe fn _mm256_maskz_rol_epi64(k: __mmask8, a: __m256i, imm8: i32) -> __m2 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rol_epi64&expand=4688) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_rol_epi64(a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_rol_epi64(a: __m128i) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i64x2(); - macro_rules! call { - ($imm8:expr) => { - vprolq128(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprolq128(a, IMM8); transmute(r) } @@ -17082,17 +17014,17 @@ pub unsafe fn _mm_rol_epi64(a: __m128i, imm8: i32) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rol_epi64&expand=4686) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_mask_rol_epi64(src: __m128i, k: __mmask8, a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_mask_rol_epi64( + src: __m128i, + k: __mmask8, + a: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i64x2(); - macro_rules! call { - ($imm8:expr) => { - vprolq128(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, rol, src.as_i64x2())) + let r = vprolq128(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x2())) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17100,18 +17032,14 @@ pub unsafe fn _mm_mask_rol_epi64(src: __m128i, k: __mmask8, a: __m128i, imm8: i3 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rol_epi64&expand=4687) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_maskz_rol_epi64(k: __mmask8, a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_maskz_rol_epi64(k: __mmask8, a: __m128i) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i64x2(); - macro_rules! call { - ($imm8:expr) => { - vprolq128(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); + let r = vprolq128(a, IMM8); let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, rol, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -17119,16 +17047,12 @@ pub unsafe fn _mm_maskz_rol_epi64(k: __mmask8, a: __m128i, imm8: i32) -> __m128i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_ror_epi64&expand=4730) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_ror_epi64(a: __m512i) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vprorq(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprorq(a, IMM8); transmute(r) } @@ -17137,17 +17061,17 @@ pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_ror_epi64&expand=4728) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_ror_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vprorq(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, ror, src.as_i64x8())) + let r = vprorq(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x8())) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17155,18 +17079,14 @@ pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_ror_epi64&expand=4729) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vprorq(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); + let r = vprorq(a, IMM8); let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, ror, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -17174,16 +17094,12 @@ pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m5 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_ror_epi64&expand=4727) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm256_ror_epi64(a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm256_ror_epi64(a: __m256i) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i64x4(); - macro_rules! call { - ($imm8:expr) => { - vprorq256(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprorq256(a, IMM8); transmute(r) } @@ -17192,17 +17108,17 @@ pub unsafe fn _mm256_ror_epi64(a: __m256i, imm8: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_ror_epi64&expand=4725) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_mask_ror_epi64(src: __m256i, k: __mmask8, a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_mask_ror_epi64( + src: __m256i, + k: __mmask8, + a: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i64x4(); - macro_rules! call { - ($imm8:expr) => { - vprorq256(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, ror, src.as_i64x4())) + let r = vprorq256(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x4())) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17210,18 +17126,14 @@ pub unsafe fn _mm256_mask_ror_epi64(src: __m256i, k: __mmask8, a: __m256i, imm8: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_ror_epi64&expand=4726) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_maskz_ror_epi64(k: __mmask8, a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_maskz_ror_epi64(k: __mmask8, a: __m256i) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i64x4(); - macro_rules! call { - ($imm8:expr) => { - vprorq256(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); + let r = vprorq256(a, IMM8); let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, ror, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -17229,16 +17141,12 @@ pub unsafe fn _mm256_maskz_ror_epi64(k: __mmask8, a: __m256i, imm8: i32) -> __m2 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ror_epi64&expand=4724) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_ror_epi64(a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_ror_epi64(a: __m128i) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i64x2(); - macro_rules! call { - ($imm8:expr) => { - vprorq128(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprorq128(a, IMM8); transmute(r) } @@ -17247,17 +17155,17 @@ pub unsafe fn _mm_ror_epi64(a: __m128i, imm8: i32) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_ror_epi64&expand=4722) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_mask_ror_epi64(src: __m128i, k: __mmask8, a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_mask_ror_epi64( + src: __m128i, + k: __mmask8, + a: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i64x2(); - macro_rules! call { - ($imm8:expr) => { - vprorq128(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, ror, src.as_i64x2())) + let r = vprorq128(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x2())) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17265,18 +17173,14 @@ pub unsafe fn _mm_mask_ror_epi64(src: __m128i, k: __mmask8, a: __m128i, imm8: i3 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_ror_epi64&expand=4723) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_maskz_ror_epi64(k: __mmask8, a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_maskz_ror_epi64(k: __mmask8, a: __m128i) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i64x2(); - macro_rules! call { - ($imm8:expr) => { - vprorq128(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); + let r = vprorq128(a, IMM8); let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, ror, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. @@ -18343,7 +18247,7 @@ pub unsafe fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __ pub unsafe fn _mm512_srai_epi32(a: __m512i) -> __m512i { static_assert_imm8u!(IMM8); let a = a.as_i32x16(); - let r = vpsraid(a, IMM8); + let r = vpsraid512(a, IMM8); transmute(r) } @@ -18361,7 +18265,7 @@ pub unsafe fn _mm512_mask_srai_epi32( ) -> __m512i { static_assert_imm8u!(IMM8); let a = a.as_i32x16(); - let r = vpsraid(a, IMM8); + let r = vpsraid512(a, IMM8); transmute(simd_select_bitmask(k, r, src.as_i32x16())) } @@ -18375,7 +18279,7 @@ pub unsafe fn _mm512_mask_srai_epi32( pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i) -> __m512i { static_assert_imm8u!(IMM8); let a = a.as_i32x16(); - let r = vpsraid(a, IMM8); + let r = vpsraid512(a, IMM8); let zero = _mm512_setzero_si512().as_i32x16(); transmute(simd_select_bitmask(k, r, zero)) } @@ -18385,16 +18289,16 @@ pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i) /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_srai_epi32&expand=5431) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_mask_srai_epi32(src: __m256i, k: __mmask8, a: __m256i, imm8: u32) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_srai_epi32::<$imm8>(a) - }; - } - let shf = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, shf.as_i32x8(), src.as_i32x8())) +#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_mask_srai_epi32( + src: __m256i, + k: __mmask8, + a: __m256i, +) -> __m256i { + let imm8 = IMM8 as i32; + let r = psraid256(a.as_i32x8(), imm8); + transmute(simd_select_bitmask(k, r, src.as_i32x8())) } /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18402,17 +18306,13 @@ pub unsafe fn _mm256_mask_srai_epi32(src: __m256i, k: __mmask8, a: __m256i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_srai_epi32&expand=5432) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_maskz_srai_epi32(k: __mmask8, a: __m256i, imm8: u32) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_srai_epi32::<$imm8>(a) - }; - } - let shf = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_maskz_srai_epi32(k: __mmask8, a: __m256i) -> __m256i { + let imm8 = IMM8 as i32; + let r = psraid256(a.as_i32x8(), imm8); let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, shf.as_i32x8(), zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18420,16 +18320,16 @@ pub unsafe fn _mm256_maskz_srai_epi32(k: __mmask8, a: __m256i, imm8: u32) -> __m /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_srai_epi32&expand=5428) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_mask_srai_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: u32) -> __m128i { - macro_rules! call { - ($imm8:expr) => { - _mm_srai_epi32::<$imm8>(a) - }; - } - let shf = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, shf.as_i32x4(), src.as_i32x4())) +#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_mask_srai_epi32( + src: __m128i, + k: __mmask8, + a: __m128i, +) -> __m128i { + let imm8 = IMM8 as i32; + let r = psraid128(a.as_i32x4(), imm8); + transmute(simd_select_bitmask(k, r, src.as_i32x4())) } /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18437,17 +18337,13 @@ pub unsafe fn _mm_mask_srai_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: u /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_srai_epi32&expand=5429) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_maskz_srai_epi32(k: __mmask8, a: __m128i, imm8: u32) -> __m128i { - macro_rules! call { - ($imm8:expr) => { - _mm_srai_epi32::<$imm8>(a) - }; - } - let shf = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_maskz_srai_epi32(k: __mmask8, a: __m128i) -> __m128i { + let imm8 = IMM8 as i32; + let r = psraid128(a.as_i32x4(), imm8); let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, shf.as_i32x4(), zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. @@ -37615,7 +37511,11 @@ extern "C" { fn vpsraq128(a: i64x2, count: i64x2) -> i64x2; #[link_name = "llvm.x86.avx512.psrai.d.512"] - fn vpsraid(a: i32x16, imm8: u32) -> i32x16; + fn vpsraid512(a: i32x16, imm8: u32) -> i32x16; + #[link_name = "llvm.x86.avx2.psrai.d"] + fn psraid256(a: i32x8, imm8: i32) -> i32x8; + #[link_name = "llvm.x86.sse2.psrai.d"] + fn psraid128(a: i32x4, imm8: i32) -> i32x4; #[link_name = "llvm.x86.avx512.psrai.q.512"] fn vpsraiq(a: i64x8, imm8: u32) -> i64x8; @@ -45648,7 +45548,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_rol_epi32() { let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); - let r = _mm512_rol_epi32(a, 1); + let r = _mm512_rol_epi32::<1>(a); let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); assert_eq_m512i(r, e); } @@ -45656,9 +45556,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_rol_epi32() { let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); - let r = _mm512_mask_rol_epi32(a, 0, a, 1); + let r = _mm512_mask_rol_epi32::<1>(a, 0, a); assert_eq_m512i(r, a); - let r = _mm512_mask_rol_epi32(a, 0b11111111_11111111, a, 1); + let r = _mm512_mask_rol_epi32::<1>(a, 0b11111111_11111111, a); let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); assert_eq_m512i(r, e); } @@ -45666,9 +45566,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_rol_epi32() { let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31); - let r = _mm512_maskz_rol_epi32(0, a, 1); + let r = _mm512_maskz_rol_epi32::<1>(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_rol_epi32(0b00000000_11111111, a, 1); + let r = _mm512_maskz_rol_epi32::<1>(0b00000000_11111111, a); let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0); assert_eq_m512i(r, e); } @@ -45676,7 +45576,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_rol_epi32() { let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); - let r = _mm256_rol_epi32(a, 1); + let r = _mm256_rol_epi32::<1>(a); let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); assert_eq_m256i(r, e); } @@ -45684,9 +45584,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_mask_rol_epi32() { let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); - let r = _mm256_mask_rol_epi32(a, 0, a, 1); + let r = _mm256_mask_rol_epi32::<1>(a, 0, a); assert_eq_m256i(r, a); - let r = _mm256_mask_rol_epi32(a, 0b11111111, a, 1); + let r = _mm256_mask_rol_epi32::<1>(a, 0b11111111, a); let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); assert_eq_m256i(r, e); } @@ -45694,9 +45594,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_maskz_rol_epi32() { let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); - let r = _mm256_maskz_rol_epi32(0, a, 1); + let r = _mm256_maskz_rol_epi32::<1>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_rol_epi32(0b11111111, a, 1); + let r = _mm256_maskz_rol_epi32::<1>(0b11111111, a); let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); assert_eq_m256i(r, e); } @@ -45704,7 +45604,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_rol_epi32() { let a = _mm_set_epi32(1 << 31, 1, 1, 1); - let r = _mm_rol_epi32(a, 1); + let r = _mm_rol_epi32::<1>(a); let e = _mm_set_epi32(1 << 0, 2, 2, 2); assert_eq_m128i(r, e); } @@ -45712,9 +45612,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_mask_rol_epi32() { let a = _mm_set_epi32(1 << 31, 1, 1, 1); - let r = _mm_mask_rol_epi32(a, 0, a, 1); + let r = _mm_mask_rol_epi32::<1>(a, 0, a); assert_eq_m128i(r, a); - let r = _mm_mask_rol_epi32(a, 0b00001111, a, 1); + let r = _mm_mask_rol_epi32::<1>(a, 0b00001111, a); let e = _mm_set_epi32(1 << 0, 2, 2, 2); assert_eq_m128i(r, e); } @@ -45722,9 +45622,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_maskz_rol_epi32() { let a = _mm_set_epi32(1 << 31, 1, 1, 1); - let r = _mm_maskz_rol_epi32(0, a, 1); + let r = _mm_maskz_rol_epi32::<1>(0, a); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_rol_epi32(0b00001111, a, 1); + let r = _mm_maskz_rol_epi32::<1>(0b00001111, a); let e = _mm_set_epi32(1 << 0, 2, 2, 2); assert_eq_m128i(r, e); } @@ -45732,7 +45632,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_ror_epi32() { let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); - let r = _mm512_ror_epi32(a, 1); + let r = _mm512_ror_epi32::<1>(a); let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); assert_eq_m512i(r, e); } @@ -45740,9 +45640,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_ror_epi32() { let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); - let r = _mm512_mask_ror_epi32(a, 0, a, 1); + let r = _mm512_mask_ror_epi32::<1>(a, 0, a); assert_eq_m512i(r, a); - let r = _mm512_mask_ror_epi32(a, 0b11111111_11111111, a, 1); + let r = _mm512_mask_ror_epi32::<1>(a, 0b11111111_11111111, a); let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); assert_eq_m512i(r, e); } @@ -45750,9 +45650,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_ror_epi32() { let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0); - let r = _mm512_maskz_ror_epi32(0, a, 1); + let r = _mm512_maskz_ror_epi32::<1>(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_ror_epi32(0b00000000_11111111, a, 1); + let r = _mm512_maskz_ror_epi32::<1>(0b00000000_11111111, a); let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31); assert_eq_m512i(r, e); } @@ -45760,7 +45660,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_ror_epi32() { let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); - let r = _mm256_ror_epi32(a, 1); + let r = _mm256_ror_epi32::<1>(a); let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); assert_eq_m256i(r, e); } @@ -45768,9 +45668,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_mask_ror_epi32() { let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); - let r = _mm256_mask_ror_epi32(a, 0, a, 1); + let r = _mm256_mask_ror_epi32::<1>(a, 0, a); assert_eq_m256i(r, a); - let r = _mm256_mask_ror_epi32(a, 0b11111111, a, 1); + let r = _mm256_mask_ror_epi32::<1>(a, 0b11111111, a); let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); assert_eq_m256i(r, e); } @@ -45778,9 +45678,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_maskz_ror_epi32() { let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); - let r = _mm256_maskz_ror_epi32(0, a, 1); + let r = _mm256_maskz_ror_epi32::<1>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_ror_epi32(0b11111111, a, 1); + let r = _mm256_maskz_ror_epi32::<1>(0b11111111, a); let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); assert_eq_m256i(r, e); } @@ -45788,7 +45688,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_ror_epi32() { let a = _mm_set_epi32(1 << 0, 2, 2, 2); - let r = _mm_ror_epi32(a, 1); + let r = _mm_ror_epi32::<1>(a); let e = _mm_set_epi32(1 << 31, 1, 1, 1); assert_eq_m128i(r, e); } @@ -45796,9 +45696,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_mask_ror_epi32() { let a = _mm_set_epi32(1 << 0, 2, 2, 2); - let r = _mm_mask_ror_epi32(a, 0, a, 1); + let r = _mm_mask_ror_epi32::<1>(a, 0, a); assert_eq_m128i(r, a); - let r = _mm_mask_ror_epi32(a, 0b00001111, a, 1); + let r = _mm_mask_ror_epi32::<1>(a, 0b00001111, a); let e = _mm_set_epi32(1 << 31, 1, 1, 1); assert_eq_m128i(r, e); } @@ -45806,9 +45706,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_maskz_ror_epi32() { let a = _mm_set_epi32(1 << 0, 2, 2, 2); - let r = _mm_maskz_ror_epi32(0, a, 1); + let r = _mm_maskz_ror_epi32::<1>(0, a); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_ror_epi32(0b00001111, a, 1); + let r = _mm_maskz_ror_epi32::<1>(0b00001111, a); let e = _mm_set_epi32(1 << 31, 1, 1, 1); assert_eq_m128i(r, e); } @@ -46664,9 +46564,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_mask_srai_epi32() { let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); - let r = _mm256_mask_srai_epi32(a, 0, a, 1); + let r = _mm256_mask_srai_epi32::<1>(a, 0, a); assert_eq_m256i(r, a); - let r = _mm256_mask_srai_epi32(a, 0b11111111, a, 1); + let r = _mm256_mask_srai_epi32::<1>(a, 0b11111111, a); let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0); assert_eq_m256i(r, e); } @@ -46674,9 +46574,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_maskz_srai_epi32() { let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); - let r = _mm256_maskz_srai_epi32(0, a, 1); + let r = _mm256_maskz_srai_epi32::<1>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_srai_epi32(0b11111111, a, 1); + let r = _mm256_maskz_srai_epi32::<1>(0b11111111, a); let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0); assert_eq_m256i(r, e); } @@ -46684,9 +46584,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_mask_srai_epi32() { let a = _mm_set_epi32(1 << 5, 0, 0, 0); - let r = _mm_mask_srai_epi32(a, 0, a, 1); + let r = _mm_mask_srai_epi32::<1>(a, 0, a); assert_eq_m128i(r, a); - let r = _mm_mask_srai_epi32(a, 0b00001111, a, 1); + let r = _mm_mask_srai_epi32::<1>(a, 0b00001111, a); let e = _mm_set_epi32(1 << 4, 0, 0, 0); assert_eq_m128i(r, e); } @@ -46694,9 +46594,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_maskz_srai_epi32() { let a = _mm_set_epi32(1 << 5, 0, 0, 0); - let r = _mm_maskz_srai_epi32(0, a, 1); + let r = _mm_maskz_srai_epi32::<1>(0, a); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_srai_epi32(0b00001111, a, 1); + let r = _mm_maskz_srai_epi32::<1>(0b00001111, a); let e = _mm_set_epi32(1 << 4, 0, 0, 0); assert_eq_m128i(r, e); } diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index af62b2112c..84eab28e34 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -7627,7 +7627,7 @@ mod tests { 1 << 63, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, ); - let r = _mm512_rol_epi64(a, 1); + let r = _mm512_rol_epi64::<1>(a); #[rustfmt::skip] let e = _mm512_set_epi64( 1 << 0, 1 << 33, 1 << 33, 1 << 33, @@ -7643,9 +7643,9 @@ mod tests { 1 << 63, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, ); - let r = _mm512_mask_rol_epi64(a, 0, a, 1); + let r = _mm512_mask_rol_epi64::<1>(a, 0, a); assert_eq_m512i(r, a); - let r = _mm512_mask_rol_epi64(a, 0b11111111, a, 1); + let r = _mm512_mask_rol_epi64::<1>(a, 0b11111111, a); #[rustfmt::skip] let e = _mm512_set_epi64( 1 << 0, 1 << 33, 1 << 33, 1 << 33, @@ -7661,9 +7661,9 @@ mod tests { 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 63, ); - let r = _mm512_maskz_rol_epi64(0, a, 1); + let r = _mm512_maskz_rol_epi64::<1>(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_rol_epi64(0b00001111, a, 1); + let r = _mm512_maskz_rol_epi64::<1>(0b00001111, a); let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 1 << 0); assert_eq_m512i(r, e); } @@ -7671,7 +7671,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_rol_epi64() { let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); - let r = _mm256_rol_epi64(a, 1); + let r = _mm256_rol_epi64::<1>(a); let e = _mm256_set_epi64x(1 << 0, 1 << 33, 1 << 33, 1 << 33); assert_eq_m256i(r, e); } @@ -7679,9 +7679,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_mask_rol_epi64() { let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); - let r = _mm256_mask_rol_epi64(a, 0, a, 1); + let r = _mm256_mask_rol_epi64::<1>(a, 0, a); assert_eq_m256i(r, a); - let r = _mm256_mask_rol_epi64(a, 0b00001111, a, 1); + let r = _mm256_mask_rol_epi64::<1>(a, 0b00001111, a); let e = _mm256_set_epi64x(1 << 0, 1 << 33, 1 << 33, 1 << 33); assert_eq_m256i(r, e); } @@ -7689,9 +7689,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_maskz_rol_epi64() { let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); - let r = _mm256_maskz_rol_epi64(0, a, 1); + let r = _mm256_maskz_rol_epi64::<1>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_rol_epi64(0b00001111, a, 1); + let r = _mm256_maskz_rol_epi64::<1>(0b00001111, a); let e = _mm256_set_epi64x(1 << 0, 1 << 33, 1 << 33, 1 << 33); assert_eq_m256i(r, e); } @@ -7699,7 +7699,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_rol_epi64() { let a = _mm_set_epi64x(1 << 63, 1 << 32); - let r = _mm_rol_epi64(a, 1); + let r = _mm_rol_epi64::<1>(a); let e = _mm_set_epi64x(1 << 0, 1 << 33); assert_eq_m128i(r, e); } @@ -7707,9 +7707,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_mask_rol_epi64() { let a = _mm_set_epi64x(1 << 63, 1 << 32); - let r = _mm_mask_rol_epi64(a, 0, a, 1); + let r = _mm_mask_rol_epi64::<1>(a, 0, a); assert_eq_m128i(r, a); - let r = _mm_mask_rol_epi64(a, 0b00000011, a, 1); + let r = _mm_mask_rol_epi64::<1>(a, 0b00000011, a); let e = _mm_set_epi64x(1 << 0, 1 << 33); assert_eq_m128i(r, e); } @@ -7717,9 +7717,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_maskz_rol_epi64() { let a = _mm_set_epi64x(1 << 63, 1 << 32); - let r = _mm_maskz_rol_epi64(0, a, 1); + let r = _mm_maskz_rol_epi64::<1>(0, a); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_rol_epi64(0b00000011, a, 1); + let r = _mm_maskz_rol_epi64::<1>(0b00000011, a); let e = _mm_set_epi64x(1 << 0, 1 << 33); assert_eq_m128i(r, e); } @@ -7731,7 +7731,7 @@ mod tests { 1 << 0, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, ); - let r = _mm512_ror_epi64(a, 1); + let r = _mm512_ror_epi64::<1>(a); #[rustfmt::skip] let e = _mm512_set_epi64( 1 << 63, 1 << 31, 1 << 31, 1 << 31, @@ -7747,9 +7747,9 @@ mod tests { 1 << 0, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, ); - let r = _mm512_mask_ror_epi64(a, 0, a, 1); + let r = _mm512_mask_ror_epi64::<1>(a, 0, a); assert_eq_m512i(r, a); - let r = _mm512_mask_ror_epi64(a, 0b11111111, a, 1); + let r = _mm512_mask_ror_epi64::<1>(a, 0b11111111, a); #[rustfmt::skip] let e = _mm512_set_epi64( 1 << 63, 1 << 31, 1 << 31, 1 << 31, @@ -7765,9 +7765,9 @@ mod tests { 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 0, ); - let r = _mm512_maskz_ror_epi64(0, a, 1); + let r = _mm512_maskz_ror_epi64::<1>(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_ror_epi64(0b00001111, a, 1); + let r = _mm512_maskz_ror_epi64::<1>(0b00001111, a); let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 1 << 63); assert_eq_m512i(r, e); } @@ -7775,7 +7775,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_ror_epi64() { let a = _mm256_set_epi64x(1 << 0, 1 << 32, 1 << 32, 1 << 32); - let r = _mm256_ror_epi64(a, 1); + let r = _mm256_ror_epi64::<1>(a); let e = _mm256_set_epi64x(1 << 63, 1 << 31, 1 << 31, 1 << 31); assert_eq_m256i(r, e); } @@ -7783,9 +7783,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_mask_ror_epi64() { let a = _mm256_set_epi64x(1 << 0, 1 << 32, 1 << 32, 1 << 32); - let r = _mm256_mask_ror_epi64(a, 0, a, 1); + let r = _mm256_mask_ror_epi64::<1>(a, 0, a); assert_eq_m256i(r, a); - let r = _mm256_mask_ror_epi64(a, 0b00001111, a, 1); + let r = _mm256_mask_ror_epi64::<1>(a, 0b00001111, a); let e = _mm256_set_epi64x(1 << 63, 1 << 31, 1 << 31, 1 << 31); assert_eq_m256i(r, e); } @@ -7793,9 +7793,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_maskz_ror_epi64() { let a = _mm256_set_epi64x(1 << 0, 1 << 32, 1 << 32, 1 << 32); - let r = _mm256_maskz_ror_epi64(0, a, 1); + let r = _mm256_maskz_ror_epi64::<1>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_ror_epi64(0b00001111, a, 1); + let r = _mm256_maskz_ror_epi64::<1>(0b00001111, a); let e = _mm256_set_epi64x(1 << 63, 1 << 31, 1 << 31, 1 << 31); assert_eq_m256i(r, e); } @@ -7803,7 +7803,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_ror_epi64() { let a = _mm_set_epi64x(1 << 0, 1 << 32); - let r = _mm_ror_epi64(a, 1); + let r = _mm_ror_epi64::<1>(a); let e = _mm_set_epi64x(1 << 63, 1 << 31); assert_eq_m128i(r, e); } @@ -7811,9 +7811,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_mask_ror_epi64() { let a = _mm_set_epi64x(1 << 0, 1 << 32); - let r = _mm_mask_ror_epi64(a, 0, a, 1); + let r = _mm_mask_ror_epi64::<1>(a, 0, a); assert_eq_m128i(r, a); - let r = _mm_mask_ror_epi64(a, 0b00000011, a, 1); + let r = _mm_mask_ror_epi64::<1>(a, 0b00000011, a); let e = _mm_set_epi64x(1 << 63, 1 << 31); assert_eq_m128i(r, e); } @@ -7821,9 +7821,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_maskz_ror_epi64() { let a = _mm_set_epi64x(1 << 0, 1 << 32); - let r = _mm_maskz_ror_epi64(0, a, 1); + let r = _mm_maskz_ror_epi64::<1>(0, a); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_ror_epi64(0b00000011, a, 1); + let r = _mm_maskz_ror_epi64::<1>(0b00000011, a); let e = _mm_set_epi64x(1 << 63, 1 << 31); assert_eq_m128i(r, e); } From f4b85d7bd66f90a596539502814bd72c54479ff0 Mon Sep 17 00:00:00 2001 From: jironglin Date: Wed, 3 Mar 2021 01:28:36 +0000 Subject: [PATCH 018/123] cvt_roundps,pd_epi32,epu32; cvt_roundepi32,epu32_ps; cvt_roundpd_ps; mm_add,sub,mul,div_round_ss,sd; mm_sqrt_round_ss,sd; mm_scalf_round_ss,sd; mm_fmadd,fmsub,fnmadd,fnmsub_round_ss,sd; mm_cvt_roundss_i32,u32; mm_cvt_roundsd_i32,u32; mm_cvt_roundi32,u32_ss; mm_cvt_roundsd_ss --- crates/core_arch/src/x86/avx512f.rs | 2186 ++++++++++-------------- crates/core_arch/src/x86_64/avx512f.rs | 30 +- 2 files changed, 878 insertions(+), 1338 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 7bf8bdeae9..7911157eb2 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -13393,17 +13393,13 @@ pub unsafe fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundps_epi32&expand=1335) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2dq, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvt_roundps_epi32(a: __m512, rounding: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvt_roundps_epi32(a: __m512) -> __m512i { + static_assert_rounding!(ROUNDING); let a = a.as_f32x16(); let zero = _mm512_setzero_si512().as_i32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2dq(a, zero, 0b11111111_11111111, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtps2dq(a, zero, 0b11111111_11111111, ROUNDING); transmute(r) } @@ -13419,22 +13415,17 @@ pub unsafe fn _mm512_cvt_roundps_epi32(a: __m512, rounding: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundps_epi32&expand=1336) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2dq, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvt_roundps_epi32( +#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvt_roundps_epi32( src: __m512i, k: __mmask16, a: __m512, - rounding: i32, ) -> __m512i { + static_assert_rounding!(ROUNDING); let a = a.as_f32x16(); let src = src.as_i32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2dq(a, src, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtps2dq(a, src, k, ROUNDING); transmute(r) } @@ -13450,17 +13441,16 @@ pub unsafe fn _mm512_mask_cvt_roundps_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundps_epi32&expand=1337) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2dq, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvt_roundps_epi32(k: __mmask16, a: __m512, rounding: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvt_roundps_epi32( + k: __mmask16, + a: __m512, +) -> __m512i { + static_assert_rounding!(ROUNDING); let a = a.as_f32x16(); let zero = _mm512_setzero_si512().as_i32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2dq(a, zero, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtps2dq(a, zero, k, ROUNDING); transmute(r) } @@ -13476,17 +13466,13 @@ pub unsafe fn _mm512_maskz_cvt_roundps_epi32(k: __mmask16, a: __m512, rounding: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundps_epu32&expand=1341) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2udq, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvt_roundps_epu32(a: __m512, rounding: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvt_roundps_epu32(a: __m512) -> __m512i { + static_assert_rounding!(ROUNDING); let a = a.as_f32x16(); let zero = _mm512_setzero_si512().as_u32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2udq(a, zero, 0b11111111_11111111, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtps2udq(a, zero, 0b11111111_11111111, ROUNDING); transmute(r) } @@ -13502,22 +13488,17 @@ pub unsafe fn _mm512_cvt_roundps_epu32(a: __m512, rounding: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundps_epu32&expand=1342) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2udq, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvt_roundps_epu32( +#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvt_roundps_epu32( src: __m512i, k: __mmask16, a: __m512, - rounding: i32, ) -> __m512i { + static_assert_rounding!(ROUNDING); let a = a.as_f32x16(); let src = src.as_u32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2udq(a, src, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtps2udq(a, src, k, ROUNDING); transmute(r) } @@ -13533,17 +13514,16 @@ pub unsafe fn _mm512_mask_cvt_roundps_epu32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundps_epu32&expand=1343) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2udq, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvt_roundps_epu32(k: __mmask16, a: __m512, rounding: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvt_roundps_epu32( + k: __mmask16, + a: __m512, +) -> __m512i { + static_assert_rounding!(ROUNDING); let a = a.as_f32x16(); let zero = _mm512_setzero_si512().as_u32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2udq(a, zero, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtps2udq(a, zero, k, ROUNDING); transmute(r) } @@ -13624,17 +13604,13 @@ pub unsafe fn _mm512_maskz_cvt_roundps_pd(k: __mmask8, a: __m256, sae: i32) -> _ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundpd_epi32&expand=1315) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtpd2dq, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvt_roundpd_epi32(a: __m512d, rounding: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvt_roundpd_epi32(a: __m512d) -> __m256i { + static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let zero = _mm256_setzero_si256().as_i32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtpd2dq(a, zero, 0b11111111, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtpd2dq(a, zero, 0b11111111, ROUNDING); transmute(r) } @@ -13650,22 +13626,17 @@ pub unsafe fn _mm512_cvt_roundpd_epi32(a: __m512d, rounding: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundpd_epi32&expand=1316) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtpd2dq, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvt_roundpd_epi32( +#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvt_roundpd_epi32( src: __m256i, k: __mmask8, a: __m512d, - rounding: i32, ) -> __m256i { + static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let src = src.as_i32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtpd2dq(a, src, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtpd2dq(a, src, k, ROUNDING); transmute(r) } @@ -13681,17 +13652,16 @@ pub unsafe fn _mm512_mask_cvt_roundpd_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epi32&expand=1317) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtpd2dq, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvt_roundpd_epi32(k: __mmask8, a: __m512d, rounding: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvt_roundpd_epi32( + k: __mmask8, + a: __m512d, +) -> __m256i { + static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let zero = _mm256_setzero_si256().as_i32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtpd2dq(a, zero, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtpd2dq(a, zero, k, ROUNDING); transmute(r) } @@ -13707,17 +13677,13 @@ pub unsafe fn _mm512_maskz_cvt_roundpd_epi32(k: __mmask8, a: __m512d, rounding: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundpd_epu32&expand=1321) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtpd2udq, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvt_roundpd_epu32(a: __m512d, rounding: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvt_roundpd_epu32(a: __m512d) -> __m256i { + static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let zero = _mm256_setzero_si256().as_u32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtpd2udq(a, zero, 0b11111111, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtpd2udq(a, zero, 0b11111111, ROUNDING); transmute(r) } @@ -13733,22 +13699,17 @@ pub unsafe fn _mm512_cvt_roundpd_epu32(a: __m512d, rounding: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundpd_epu32&expand=1322) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtpd2udq, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvt_roundpd_epu32( +#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvt_roundpd_epu32( src: __m256i, k: __mmask8, a: __m512d, - rounding: i32, ) -> __m256i { + static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let src = src.as_u32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtpd2udq(a, src, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtpd2udq(a, src, k, ROUNDING); transmute(r) } @@ -13764,17 +13725,16 @@ pub unsafe fn _mm512_mask_cvt_roundpd_epu32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epu32&expand=1323) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtpd2udq, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvt_roundpd_epu32(k: __mmask8, a: __m512d, rounding: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvt_roundpd_epu32( + k: __mmask8, + a: __m512d, +) -> __m256i { + static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let zero = _mm256_setzero_si256().as_u32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtpd2udq(a, zero, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtpd2udq(a, zero, k, ROUNDING); transmute(r) } @@ -13790,17 +13750,13 @@ pub unsafe fn _mm512_maskz_cvt_roundpd_epu32(k: __mmask8, a: __m512d, rounding: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundpd_ps&expand=1327) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtpd2ps, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvt_roundpd_ps(a: __m512d, rounding: i32) -> __m256 { +#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvt_roundpd_ps(a: __m512d) -> __m256 { + static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let zero = _mm256_setzero_ps().as_f32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtpd2ps(a, zero, 0b11111111, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtpd2ps(a, zero, 0b11111111, ROUNDING); transmute(r) } @@ -13816,22 +13772,17 @@ pub unsafe fn _mm512_cvt_roundpd_ps(a: __m512d, rounding: i32) -> __m256 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundpd_ps&expand=1328) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtpd2ps, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvt_roundpd_ps( +#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvt_roundpd_ps( src: __m256, k: __mmask8, a: __m512d, - rounding: i32, ) -> __m256 { + static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let src = src.as_f32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtpd2ps(a, src, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtpd2ps(a, src, k, ROUNDING); transmute(r) } @@ -13847,17 +13798,13 @@ pub unsafe fn _mm512_mask_cvt_roundpd_ps( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_ps&expand=1329) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtpd2ps, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvt_roundpd_ps(k: __mmask8, a: __m512d, rounding: i32) -> __m256 { +#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvt_roundpd_ps(k: __mmask8, a: __m512d) -> __m256 { + static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let zero = _mm256_setzero_ps().as_f32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtpd2ps(a, zero, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtpd2ps(a, zero, k, ROUNDING); transmute(r) } @@ -13873,16 +13820,12 @@ pub unsafe fn _mm512_maskz_cvt_roundpd_ps(k: __mmask8, a: __m512d, rounding: i32 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi32_ps&expand=1294) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtdq2ps, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvt_roundepi32_ps(a: __m512i, rounding: i32) -> __m512 { +#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvt_roundepi32_ps(a: __m512i) -> __m512 { + static_assert_rounding!(ROUNDING); let a = a.as_i32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtdq2ps(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtdq2ps(a, ROUNDING); transmute(r) } @@ -13898,21 +13841,16 @@ pub unsafe fn _mm512_cvt_roundepi32_ps(a: __m512i, rounding: i32) -> __m512 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi32_ps&expand=1295) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtdq2ps, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvt_roundepi32_ps( +#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvt_roundepi32_ps( src: __m512, k: __mmask16, a: __m512i, - rounding: i32, ) -> __m512 { + static_assert_rounding!(ROUNDING); let a = a.as_i32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtdq2ps(a, $imm4) - }; - } - let r: f32x16 = constify_imm4_round!(rounding, call); + let r = vcvtdq2ps(a, ROUNDING); transmute(simd_select_bitmask(k, r, src.as_f32x16())) } @@ -13928,16 +13866,15 @@ pub unsafe fn _mm512_mask_cvt_roundepi32_ps( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi32_ps&expand=1296) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtdq2ps, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvt_roundepi32_ps(k: __mmask16, a: __m512i, rounding: i32) -> __m512 { +#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvt_roundepi32_ps( + k: __mmask16, + a: __m512i, +) -> __m512 { + static_assert_rounding!(ROUNDING); let a = a.as_i32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtdq2ps(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtdq2ps(a, ROUNDING); let zero = _mm512_setzero_ps().as_f32x16(); transmute(simd_select_bitmask(k, r, zero)) } @@ -13954,16 +13891,12 @@ pub unsafe fn _mm512_maskz_cvt_roundepi32_ps(k: __mmask16, a: __m512i, rounding: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu32_ps&expand=1303) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtudq2ps, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvt_roundepu32_ps(a: __m512i, rounding: i32) -> __m512 { +#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvt_roundepu32_ps(a: __m512i) -> __m512 { + static_assert_rounding!(ROUNDING); let a = a.as_u32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtudq2ps(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtudq2ps(a, ROUNDING); transmute(r) } @@ -13979,21 +13912,16 @@ pub unsafe fn _mm512_cvt_roundepu32_ps(a: __m512i, rounding: i32) -> __m512 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu32_ps&expand=1304) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtudq2ps, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvt_roundepu32_ps( +#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvt_roundepu32_ps( src: __m512, k: __mmask16, a: __m512i, - rounding: i32, ) -> __m512 { + static_assert_rounding!(ROUNDING); let a = a.as_u32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtudq2ps(a, $imm4) - }; - } - let r: f32x16 = constify_imm4_round!(rounding, call); + let r = vcvtudq2ps(a, ROUNDING); transmute(simd_select_bitmask(k, r, src.as_f32x16())) } @@ -14009,16 +13937,15 @@ pub unsafe fn _mm512_mask_cvt_roundepu32_ps( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu32_ps&expand=1305) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtudq2ps, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvt_roundepu32_ps(k: __mmask16, a: __m512i, rounding: i32) -> __m512 { +#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvt_roundepu32_ps( + k: __mmask16, + a: __m512i, +) -> __m512 { + static_assert_rounding!(ROUNDING); let a = a.as_u32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtudq2ps(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtudq2ps(a, ROUNDING); let zero = _mm512_setzero_ps().as_f32x16(); transmute(simd_select_bitmask(k, r, zero)) } @@ -33519,18 +33446,15 @@ pub unsafe fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_ss&expand=151) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vaddss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_add_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_add_round_ss(a: __m128, b: __m128) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vaddss(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vaddss(a, b, zero, 0b1, ROUNDING); + transmute(r) } /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -33545,24 +33469,20 @@ pub unsafe fn _mm_add_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_add_round_ss&expand=152) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vaddss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_add_round_ss( +#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_add_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let src = src.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vaddss(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vaddss(a, b, src, k, ROUNDING); + transmute(r) } /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -33577,18 +33497,19 @@ pub unsafe fn _mm_mask_add_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_round_ss&expand=153) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vaddss, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_add_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_add_round_ss( + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vaddss(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vaddss(a, b, zero, k, ROUNDING); + transmute(r) } /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -33603,18 +33524,15 @@ pub unsafe fn _mm_maskz_add_round_ss(k: __mmask8, a: __m128, b: __m128, rounding /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_sd&expand=148) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vaddsd, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_add_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_add_round_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vaddsd(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vaddsd(a, b, zero, 0b1, ROUNDING); + transmute(r) } /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -33629,24 +33547,20 @@ pub unsafe fn _mm_add_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_add_round_Sd&expand=149) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vaddsd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_add_round_sd( +#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_add_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let src = src.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vaddsd(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vaddsd(a, b, src, k, ROUNDING); + transmute(r) } /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -33661,23 +33575,19 @@ pub unsafe fn _mm_mask_add_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_round_sd&expand=150) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vaddsd, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_add_round_sd( +#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_add_round_sd( k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vaddsd(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vaddsd(a, b, zero, k, ROUNDING); + transmute(r) } /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -33692,18 +33602,15 @@ pub unsafe fn _mm_maskz_add_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_sub_round_ss&expand=5745) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsubss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_sub_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_sub_round_ss(a: __m128, b: __m128) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vsubss(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsubss(a, b, zero, 0b1, ROUNDING); + transmute(r) } /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -33718,24 +33625,20 @@ pub unsafe fn _mm_sub_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sub_round_ss&expand=5743) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsubss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_sub_round_ss( +#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_sub_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let src = src.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vsubss(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsubss(a, b, src, k, ROUNDING); + transmute(r) } /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -33750,18 +33653,19 @@ pub unsafe fn _mm_mask_sub_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sub_round_ss&expand=5744) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsubss, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_sub_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_sub_round_ss( + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vsubss(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsubss(a, b, zero, k, ROUNDING); + transmute(r) } /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -33776,18 +33680,15 @@ pub unsafe fn _mm_maskz_sub_round_ss(k: __mmask8, a: __m128, b: __m128, rounding /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_sub_round_sd&expand=5742) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsubsd, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_sub_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_sub_round_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vsubsd(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsubsd(a, b, zero, 0b1, ROUNDING); + transmute(r) } /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -33802,24 +33703,20 @@ pub unsafe fn _mm_sub_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sub_round_sd&expand=5740) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsubsd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_sub_round_sd( +#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_sub_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let src = src.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vsubsd(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsubsd(a, b, src, k, ROUNDING); + transmute(r) } /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -33834,23 +33731,19 @@ pub unsafe fn _mm_mask_sub_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sub_round_sd&expand=5741) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsubsd, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_sub_round_sd( +#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_sub_round_sd( k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vsubsd(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsubsd(a, b, zero, k, ROUNDING); + transmute(r) } /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -33865,18 +33758,15 @@ pub unsafe fn _mm_maskz_sub_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mul_round_ss&expand=3946) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmulss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_mul_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_mul_round_ss(a: __m128, b: __m128) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vmulss(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vmulss(a, b, zero, 0b1, ROUNDING); + transmute(r) } /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -33891,24 +33781,20 @@ pub unsafe fn _mm_mul_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_mul_round_ss&expand=3944) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmulss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_mul_round_ss( +#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_mul_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let src = src.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vmulss(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vmulss(a, b, src, k, ROUNDING); + transmute(r) } /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -33923,18 +33809,19 @@ pub unsafe fn _mm_mask_mul_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_mul_round_ss&expand=3945) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmulss, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_mul_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_mul_round_ss( + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vmulss(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vmulss(a, b, zero, k, ROUNDING); + transmute(r) } /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -33949,18 +33836,15 @@ pub unsafe fn _mm_maskz_mul_round_ss(k: __mmask8, a: __m128, b: __m128, rounding /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mul_round_sd&expand=3943) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmulsd, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_mul_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_mul_round_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vmulsd(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vmulsd(a, b, zero, 0b1, ROUNDING); + transmute(r) } /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -33975,24 +33859,20 @@ pub unsafe fn _mm_mul_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_mul_round_sd&expand=3941) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmulsd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_mul_round_sd( +#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_mul_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let src = src.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vmulsd(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vmulsd(a, b, src, k, ROUNDING); + transmute(r) } /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -34007,23 +33887,19 @@ pub unsafe fn _mm_mask_mul_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_mul_round_sd&expand=3942) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmulsd, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_mul_round_sd( +#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_mul_round_sd( k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vmulsd(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vmulsd(a, b, zero, k, ROUNDING); + transmute(r) } /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34038,18 +33914,15 @@ pub unsafe fn _mm_maskz_mul_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_div_round_ss&expand=2174) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vdivss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_div_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_div_round_ss(a: __m128, b: __m128) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vdivss(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vdivss(a, b, zero, 0b1, ROUNDING); + transmute(r) } /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34064,24 +33937,20 @@ pub unsafe fn _mm_div_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_div_round_ss&expand=2175) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vdivss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_div_round_ss( +#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_div_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let src = src.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vdivss(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vdivss(a, b, src, k, ROUNDING); + transmute(r) } /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34096,18 +33965,19 @@ pub unsafe fn _mm_mask_div_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_div_round_ss&expand=2176) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vdivss, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_div_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_div_round_ss( + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vdivss(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vdivss(a, b, zero, k, ROUNDING); + transmute(r) } /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -34122,18 +33992,15 @@ pub unsafe fn _mm_maskz_div_round_ss(k: __mmask8, a: __m128, b: __m128, rounding /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_div_round_sd&expand=2171) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vdivsd, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_div_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_div_round_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vdivsd(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vdivsd(a, b, zero, 0b1, ROUNDING); + transmute(r) } /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -34148,24 +34015,20 @@ pub unsafe fn _mm_div_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_div_round_sd&expand=2172) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vdivsd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_div_round_sd( +#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_div_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let src = src.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vdivsd(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vdivsd(a, b, src, k, ROUNDING); + transmute(r) } /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -34180,23 +34043,19 @@ pub unsafe fn _mm_mask_div_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_div_round_sd&expand=2173) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vdivsd, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_div_round_sd( +#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_div_round_sd( k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vdivsd(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vdivsd(a, b, zero, k, ROUNDING); + transmute(r) } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34475,18 +34334,15 @@ pub unsafe fn _mm_maskz_min_round_sd(k: __mmask8, a: __m128d, b: __m128d, sae: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_sqrt_round_ss&expand=5383) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsqrtss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_sqrt_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_sqrt_round_ss(a: __m128, b: __m128) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vsqrtss(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsqrtss(a, b, zero, 0b1, ROUNDING); + transmute(r) } /// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34501,24 +34357,20 @@ pub unsafe fn _mm_sqrt_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sqrt_round_ss&expand=5381) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsqrtss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_sqrt_round_ss( +#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_sqrt_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let src = src.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vsqrtss(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsqrtss(a, b, src, k, ROUNDING); + transmute(r) } /// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34533,18 +34385,19 @@ pub unsafe fn _mm_mask_sqrt_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sqrt_round_ss&expand=5382) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsqrtss, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_sqrt_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_sqrt_round_ss( + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vsqrtss(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsqrtss(a, b, zero, k, ROUNDING); + transmute(r) } /// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -34559,18 +34412,15 @@ pub unsafe fn _mm_maskz_sqrt_round_ss(k: __mmask8, a: __m128, b: __m128, roundin /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_sqrt_round_sd&expand=5380) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsqrtsd, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_sqrt_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_sqrt_round_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vsqrtsd(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsqrtsd(a, b, zero, 0b1, ROUNDING); + transmute(r) } /// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -34585,24 +34435,20 @@ pub unsafe fn _mm_sqrt_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sqrt_round_sd&expand=5378) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsqrtsd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_sqrt_round_sd( +#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_sqrt_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let src = src.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vsqrtsd(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsqrtsd(a, b, src, k, ROUNDING); + transmute(r) } /// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -34617,23 +34463,19 @@ pub unsafe fn _mm_mask_sqrt_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sqrt_round_sd&expand=5379) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vsqrtsd, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_sqrt_round_sd( +#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_sqrt_round_sd( k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vsqrtsd(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_round!(rounding, call)) + let r = vsqrtsd(a, b, zero, k, ROUNDING); + transmute(r) } /// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\ @@ -35196,18 +35038,14 @@ pub unsafe fn _mm_maskz_roundscale_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_scalef_round_ss&expand=4895) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vscalefss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_scalef_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_scalef_round_ss(a: __m128, b: __m128) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vscalefss(a, b, zero, 0b11111111, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vscalefss(a, b, zero, 0b11111111, ROUNDING); transmute(r) } @@ -35223,24 +35061,19 @@ pub unsafe fn _mm_scalef_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_scalef_round_ss&expand=4893) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vscalefss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_scalef_round_ss( +#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_scalef_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let src = src.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vscalefss(a, b, src, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vscalefss(a, b, src, k, ROUNDING); transmute(r) } @@ -35256,23 +35089,18 @@ pub unsafe fn _mm_mask_scalef_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_scalef_round_ss&expand=4894) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vscalefss, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_scalef_round_ss( +#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_scalef_round_ss( k: __mmask8, a: __m128, b: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vscalefss(a, b, zero, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vscalefss(a, b, zero, k, ROUNDING); transmute(r) } @@ -35288,18 +35116,14 @@ pub unsafe fn _mm_maskz_scalef_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_scalef_round_sd&expand=4892) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vscalefsd, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_scalef_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_scalef_round_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vscalefsd(a, b, zero, 0b11111111, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vscalefsd(a, b, zero, 0b11111111, ROUNDING); transmute(r) } @@ -35315,24 +35139,18 @@ pub unsafe fn _mm_scalef_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m1 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_scalef_round_sd&expand=4890) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vscalefsd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_scalef_round_sd( +#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_scalef_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { let a = a.as_f64x2(); let b = b.as_f64x2(); let src = src.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vscalefsd(a, b, src, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vscalefsd(a, b, src, k, ROUNDING); transmute(r) } @@ -35348,23 +35166,18 @@ pub unsafe fn _mm_mask_scalef_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_scalef_round_sd&expand=4891) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vscalefsd, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_scalef_round_sd( +#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_scalef_round_sd( k: __mmask8, a: __m128d, b: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vscalefsd(a, b, zero, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vscalefsd(a, b, zero, k, ROUNDING); transmute(r) } @@ -35380,19 +35193,15 @@ pub unsafe fn _mm_maskz_scalef_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fmadd_round_ss&expand=2573) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_fmadd_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vfmadd213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_fmadd_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + static_assert_rounding!(ROUNDING); let extracta: f32 = simd_extract(a, 0); let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - let fmadd = constify_imm4_round!(rounding, call); - let r = simd_insert(a, 0, fmadd); + let r = vfmadd132ss(extracta, extractb, extractc, ROUNDING); + let r = simd_insert(a, 0, r); transmute(r) } @@ -35408,25 +35217,20 @@ pub unsafe fn _mm_fmadd_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmadd_round_ss&expand=2574) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_fmadd_round_ss( +#[cfg_attr(test, assert_instr(vfmadd213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_fmadd_round_ss( a: __m128, k: __mmask8, b: __m128, c: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fmadd: f32 = simd_extract(a, 0); if (k & 0b00000001) != 0 { let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(fmadd, extractb, extractc, $imm4) - }; - } - fmadd = constify_imm4_round!(rounding, call); + fmadd = vfmadd132ss(fmadd, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fmadd); transmute(r) @@ -35444,26 +35248,21 @@ pub unsafe fn _mm_mask_fmadd_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmadd_round_ss&expand=2576) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_maskz_fmadd_round_ss( +#[cfg_attr(test, assert_instr(vfmadd213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_maskz_fmadd_round_ss( k: __mmask8, a: __m128, b: __m128, c: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fmadd: f32 = 0.; if (k & 0b00000001) != 0 { let extracta: f32 = simd_extract(a, 0); let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - fmadd = constify_imm4_round!(rounding, call); + fmadd = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fmadd); transmute(r) @@ -35481,25 +35280,20 @@ pub unsafe fn _mm_maskz_fmadd_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmadd_round_ss&expand=2575) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask3_fmadd_round_ss( +#[cfg_attr(test, assert_instr(vfmadd213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask3_fmadd_round_ss( a: __m128, b: __m128, c: __m128, k: __mmask8, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fmadd: f32 = simd_extract(c, 0); if (k & 0b00000001) != 0 { let extracta: f32 = simd_extract(a, 0); let extractb: f32 = simd_extract(b, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, fmadd, $imm4) - }; - } - fmadd = constify_imm4_round!(rounding, call); + fmadd = vfmadd132ss(extracta, extractb, fmadd, ROUNDING); } let r = simd_insert(c, 0, fmadd); transmute(r) @@ -35517,18 +35311,18 @@ pub unsafe fn _mm_mask3_fmadd_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fmadd_round_sd&expand=2569) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_fmadd_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vfmadd213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_fmadd_round_sd( + a: __m128d, + b: __m128d, + c: __m128d, +) -> __m128d { + static_assert_rounding!(ROUNDING); let extracta: f64 = simd_extract(a, 0); let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - let fmadd = constify_imm4_round!(rounding, call); + let fmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING); let r = simd_insert(a, 0, fmadd); transmute(r) } @@ -35545,25 +35339,20 @@ pub unsafe fn _mm_fmadd_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmadd_round_sd&expand=2570) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_fmadd_round_sd( +#[cfg_attr(test, assert_instr(vfmadd213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_fmadd_round_sd( a: __m128d, k: __mmask8, b: __m128d, c: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fmadd: f64 = simd_extract(a, 0); if (k & 0b00000001) != 0 { let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(fmadd, extractb, extractc, $imm4) - }; - } - fmadd = constify_imm4_round!(rounding, call); + fmadd = vfmadd132sd(fmadd, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fmadd); transmute(r) @@ -35581,26 +35370,21 @@ pub unsafe fn _mm_mask_fmadd_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmadd_round_sd&expand=2572) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_maskz_fmadd_round_sd( +#[cfg_attr(test, assert_instr(vfmadd213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_maskz_fmadd_round_sd( k: __mmask8, a: __m128d, b: __m128d, c: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fmadd: f64 = 0.; if (k & 0b00000001) != 0 { let extracta: f64 = simd_extract(a, 0); let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - fmadd = constify_imm4_round!(rounding, call); + fmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fmadd); transmute(r) @@ -35618,25 +35402,20 @@ pub unsafe fn _mm_maskz_fmadd_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmadd_round_Sd&expand=2571) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask3_fmadd_round_sd( +#[cfg_attr(test, assert_instr(vfmadd213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask3_fmadd_round_sd( a: __m128d, b: __m128d, c: __m128d, k: __mmask8, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fmadd: f64 = simd_extract(c, 0); if (k & 0b00000001) != 0 { let extracta: f64 = simd_extract(a, 0); let extractb: f64 = simd_extract(b, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, fmadd, $imm4) - }; - } - fmadd = constify_imm4_round!(rounding, call); + fmadd = vfmadd132sd(extracta, extractb, fmadd, ROUNDING); } let r = simd_insert(c, 0, fmadd); transmute(r) @@ -35654,19 +35433,15 @@ pub unsafe fn _mm_mask3_fmadd_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fmsub_round_ss&expand=2659) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_fmsub_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vfmsub213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_fmsub_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + static_assert_rounding!(ROUNDING); let extracta: f32 = simd_extract(a, 0); let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - let fmsub = constify_imm4_round!(rounding, call); + let fmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING); let r = simd_insert(a, 0, fmsub); transmute(r) } @@ -35683,26 +35458,21 @@ pub unsafe fn _mm_fmsub_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmsub_round_ss&expand=2660) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_fmsub_round_ss( +#[cfg_attr(test, assert_instr(vfmsub213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_fmsub_round_ss( a: __m128, k: __mmask8, b: __m128, c: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fmsub: f32 = simd_extract(a, 0); if (k & 0b00000001) != 0 { let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(fmsub, extractb, extractc, $imm4) - }; - } - fmsub = constify_imm4_round!(rounding, call); + fmsub = vfmadd132ss(fmsub, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fmsub); transmute(r) @@ -35720,27 +35490,22 @@ pub unsafe fn _mm_mask_fmsub_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmsub_round_ss&expand=2662) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_maskz_fmsub_round_ss( +#[cfg_attr(test, assert_instr(vfmsub213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_maskz_fmsub_round_ss( k: __mmask8, a: __m128, b: __m128, c: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fmsub: f32 = 0.; if (k & 0b00000001) != 0 { let extracta: f32 = simd_extract(a, 0); let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - fmsub = constify_imm4_round!(rounding, call); + fmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fmsub); transmute(r) @@ -35758,26 +35523,21 @@ pub unsafe fn _mm_maskz_fmsub_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmsub_round_ss&expand=2661) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask3_fmsub_round_ss( +#[cfg_attr(test, assert_instr(vfmsub213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask3_fmsub_round_ss( a: __m128, b: __m128, c: __m128, k: __mmask8, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fmsub: f32 = simd_extract(c, 0); if (k & 0b00000001) != 0 { let extracta: f32 = simd_extract(a, 0); let extractb: f32 = simd_extract(b, 0); let extractc = -fmsub; - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - fmsub = constify_imm4_round!(rounding, call); + fmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(c, 0, fmsub); transmute(r) @@ -35795,19 +35555,19 @@ pub unsafe fn _mm_mask3_fmsub_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fmsub_round_sd&expand=2655) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_fmsub_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vfmsub213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_fmsub_round_sd( + a: __m128d, + b: __m128d, + c: __m128d, +) -> __m128d { + static_assert_rounding!(ROUNDING); let extracta: f64 = simd_extract(a, 0); let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - let fmsub = constify_imm4_round!(rounding, call); + let fmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING); let r = simd_insert(a, 0, fmsub); transmute(r) } @@ -35824,26 +35584,21 @@ pub unsafe fn _mm_fmsub_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmsub_round_sd&expand=2656) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_fmsub_round_sd( +#[cfg_attr(test, assert_instr(vfmsub213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_fmsub_round_sd( a: __m128d, k: __mmask8, b: __m128d, c: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fmsub: f64 = simd_extract(a, 0); if (k & 0b00000001) != 0 { let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(fmsub, extractb, extractc, $imm4) - }; - } - fmsub = constify_imm4_round!(rounding, call); + fmsub = vfmadd132sd(fmsub, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fmsub); transmute(r) @@ -35861,27 +35616,22 @@ pub unsafe fn _mm_mask_fmsub_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmsub_round_sd&expand=2658) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_maskz_fmsub_round_sd( +#[cfg_attr(test, assert_instr(vfmsub213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_maskz_fmsub_round_sd( k: __mmask8, a: __m128d, b: __m128d, c: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fmsub: f64 = 0.; if (k & 0b00000001) != 0 { let extracta: f64 = simd_extract(a, 0); let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - fmsub = constify_imm4_round!(rounding, call); + fmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fmsub); transmute(r) @@ -35899,26 +35649,21 @@ pub unsafe fn _mm_maskz_fmsub_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmsub_round_sd&expand=2657) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask3_fmsub_round_sd( +#[cfg_attr(test, assert_instr(vfmsub213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask3_fmsub_round_sd( a: __m128d, b: __m128d, c: __m128d, k: __mmask8, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fmsub: f64 = simd_extract(c, 0); if (k & 0b00000001) != 0 { let extracta: f64 = simd_extract(a, 0); let extractb: f64 = simd_extract(b, 0); let extractc = -fmsub; - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - fmsub = constify_imm4_round!(rounding, call); + fmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(c, 0, fmsub); transmute(r) @@ -35936,19 +35681,15 @@ pub unsafe fn _mm_mask3_fmsub_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fnmadd_round_ss&expand=2739) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_fnmadd_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vfnmadd213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_fnmadd_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + static_assert_rounding!(ROUNDING); let extracta: f32 = simd_extract(a, 0); let extracta = -extracta; let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - let fnmadd = constify_imm4_round!(rounding, call); + let fnmadd = vfmadd132ss(extracta, extractb, extractc, ROUNDING); let r = simd_insert(a, 0, fnmadd); transmute(r) } @@ -35965,26 +35706,21 @@ pub unsafe fn _mm_fnmadd_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmadd_round_ss&expand=2740) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_fnmadd_round_ss( +#[cfg_attr(test, assert_instr(vfnmadd213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_fnmadd_round_ss( a: __m128, k: __mmask8, b: __m128, c: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fnmadd: f32 = simd_extract(a, 0); if (k & 0b00000001) != 0 { let extracta = -fnmadd; let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - fnmadd = constify_imm4_round!(rounding, call); + fnmadd = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fnmadd); transmute(r) @@ -36002,27 +35738,22 @@ pub unsafe fn _mm_mask_fnmadd_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmadd_round_ss&expand=2742) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_maskz_fnmadd_round_ss( +#[cfg_attr(test, assert_instr(vfnmadd213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_maskz_fnmadd_round_ss( k: __mmask8, a: __m128, b: __m128, c: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fnmadd: f32 = 0.; if (k & 0b00000001) != 0 { let extracta: f32 = simd_extract(a, 0); let extracta = -extracta; let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - fnmadd = constify_imm4_round!(rounding, call); + fnmadd = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fnmadd); transmute(r) @@ -36040,26 +35771,21 @@ pub unsafe fn _mm_maskz_fnmadd_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmadd_round_ss&expand=2741) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask3_fnmadd_round_ss( +#[cfg_attr(test, assert_instr(vfnmadd213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask3_fnmadd_round_ss( a: __m128, b: __m128, c: __m128, k: __mmask8, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fnmadd: f32 = simd_extract(c, 0); if (k & 0b00000001) != 0 { let extracta: f32 = simd_extract(a, 0); let extracta = -extracta; let extractb: f32 = simd_extract(b, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, fnmadd, $imm4) - }; - } - fnmadd = constify_imm4_round!(rounding, call); + fnmadd = vfmadd132ss(extracta, extractb, fnmadd, ROUNDING); } let r = simd_insert(c, 0, fnmadd); transmute(r) @@ -36077,19 +35803,19 @@ pub unsafe fn _mm_mask3_fnmadd_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fnmadd_round_sd&expand=2735) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_fnmadd_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vfnmadd213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_fnmadd_round_sd( + a: __m128d, + b: __m128d, + c: __m128d, +) -> __m128d { + static_assert_rounding!(ROUNDING); let extracta: f64 = simd_extract(a, 0); let extracta = -extracta; let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - let fnmadd = constify_imm4_round!(rounding, call); + let fnmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING); let r = simd_insert(a, 0, fnmadd); transmute(r) } @@ -36106,26 +35832,21 @@ pub unsafe fn _mm_fnmadd_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmadd_round_sd&expand=2736) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_fnmadd_round_sd( +#[cfg_attr(test, assert_instr(vfnmadd213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_fnmadd_round_sd( a: __m128d, k: __mmask8, b: __m128d, c: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fnmadd: f64 = simd_extract(a, 0); if (k & 0b00000001) != 0 { let extracta = -fnmadd; let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - fnmadd = constify_imm4_round!(rounding, call); + fnmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fnmadd); transmute(r) @@ -36143,27 +35864,22 @@ pub unsafe fn _mm_mask_fnmadd_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmadd_round_sd&expand=2738) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_maskz_fnmadd_round_sd( +#[cfg_attr(test, assert_instr(vfnmadd213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_maskz_fnmadd_round_sd( k: __mmask8, a: __m128d, b: __m128d, c: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fnmadd: f64 = 0.; if (k & 0b00000001) != 0 { let extracta: f64 = simd_extract(a, 0); let extracta = -extracta; let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - fnmadd = constify_imm4_round!(rounding, call); + fnmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fnmadd); transmute(r) @@ -36181,26 +35897,21 @@ pub unsafe fn _mm_maskz_fnmadd_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmadd_round_Sd&expand=2737) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask3_fnmadd_round_sd( +#[cfg_attr(test, assert_instr(vfnmadd213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask3_fnmadd_round_sd( a: __m128d, b: __m128d, c: __m128d, k: __mmask8, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fnmadd: f64 = simd_extract(c, 0); if (k & 0b00000001) != 0 { let extracta: f64 = simd_extract(a, 0); let extracta = -extracta; let extractb: f64 = simd_extract(b, 0); - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, fnmadd, $imm4) - }; - } - fnmadd = constify_imm4_round!(rounding, call); + fnmadd = vfmadd132sd(extracta, extractb, fnmadd, ROUNDING); } let r = simd_insert(c, 0, fnmadd); transmute(r) @@ -36218,20 +35929,16 @@ pub unsafe fn _mm_mask3_fnmadd_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fnmsub_round_ss&expand=2787) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_fnmsub_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vfnmsub213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_fnmsub_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + static_assert_rounding!(ROUNDING); let extracta: f32 = simd_extract(a, 0); let extracta = -extracta; let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - let fnmsub = constify_imm4_round!(rounding, call); + let fnmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING); let r = simd_insert(a, 0, fnmsub); transmute(r) } @@ -36248,27 +35955,22 @@ pub unsafe fn _mm_fnmsub_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmsub_round_ss&expand=2788) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_fnmsub_round_ss( +#[cfg_attr(test, assert_instr(vfnmsub213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_fnmsub_round_ss( a: __m128, k: __mmask8, b: __m128, c: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fnmsub: f32 = simd_extract(a, 0); if (k & 0b00000001) != 0 { let extracta = -fnmsub; let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - fnmsub = constify_imm4_round!(rounding, call); + fnmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fnmsub); transmute(r) @@ -36286,15 +35988,15 @@ pub unsafe fn _mm_mask_fnmsub_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmsub_round_ss&expand=2790) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_maskz_fnmsub_round_ss( +#[cfg_attr(test, assert_instr(vfnmsub213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_maskz_fnmsub_round_ss( k: __mmask8, a: __m128, b: __m128, c: __m128, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fnmsub: f32 = 0.; if (k & 0b00000001) != 0 { let extracta: f32 = simd_extract(a, 0); @@ -36302,12 +36004,7 @@ pub unsafe fn _mm_maskz_fnmsub_round_ss( let extractb: f32 = simd_extract(b, 0); let extractc: f32 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - fnmsub = constify_imm4_round!(rounding, call); + fnmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fnmsub); transmute(r) @@ -36325,27 +36022,22 @@ pub unsafe fn _mm_maskz_fnmsub_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmsub_round_ss&expand=2789) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask3_fnmsub_round_ss( +#[cfg_attr(test, assert_instr(vfnmsub213ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask3_fnmsub_round_ss( a: __m128, b: __m128, c: __m128, k: __mmask8, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let mut fnmsub: f32 = simd_extract(c, 0); if (k & 0b00000001) != 0 { let extracta: f32 = simd_extract(a, 0); let extracta = -extracta; let extractb: f32 = simd_extract(b, 0); let extractc = -fnmsub; - macro_rules! call { - ($imm4:expr) => { - vfmadd132ss(extracta, extractb, extractc, $imm4) - }; - } - fnmsub = constify_imm4_round!(rounding, call); + fnmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(c, 0, fnmsub); transmute(r) @@ -36363,20 +36055,20 @@ pub unsafe fn _mm_mask3_fnmsub_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fnmsub_round_sd&expand=2783) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_fnmsub_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vfnmsub213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_fnmsub_round_sd( + a: __m128d, + b: __m128d, + c: __m128d, +) -> __m128d { + static_assert_rounding!(ROUNDING); let extracta: f64 = simd_extract(a, 0); let extracta = -extracta; let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - let fnmsub = constify_imm4_round!(rounding, call); + let fnmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING); let r = simd_insert(a, 0, fnmsub); transmute(r) } @@ -36393,27 +36085,22 @@ pub unsafe fn _mm_fnmsub_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmsub_round_sd&expand=2784) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_fnmsub_round_sd( +#[cfg_attr(test, assert_instr(vfnmsub213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_fnmsub_round_sd( a: __m128d, k: __mmask8, b: __m128d, c: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fnmsub: f64 = simd_extract(a, 0); if (k & 0b00000001) != 0 { let extracta = -fnmsub; let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - fnmsub = constify_imm4_round!(rounding, call); + fnmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fnmsub); transmute(r) @@ -36431,15 +36118,15 @@ pub unsafe fn _mm_mask_fnmsub_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmsub_round_sd&expand=2786) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_maskz_fnmsub_round_sd( +#[cfg_attr(test, assert_instr(vfnmsub213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_maskz_fnmsub_round_sd( k: __mmask8, a: __m128d, b: __m128d, c: __m128d, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fnmsub: f64 = 0.; if (k & 0b00000001) != 0 { let extracta: f64 = simd_extract(a, 0); @@ -36447,12 +36134,7 @@ pub unsafe fn _mm_maskz_fnmsub_round_sd( let extractb: f64 = simd_extract(b, 0); let extractc: f64 = simd_extract(c, 0); let extractc = -extractc; - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - fnmsub = constify_imm4_round!(rounding, call); + fnmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(a, 0, fnmsub); transmute(r) @@ -36470,27 +36152,22 @@ pub unsafe fn _mm_maskz_fnmsub_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmsub_round_sd&expand=2785) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask3_fnmsub_round_sd( +#[cfg_attr(test, assert_instr(vfnmsub213sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask3_fnmsub_round_sd( a: __m128d, b: __m128d, c: __m128d, k: __mmask8, - rounding: i32, ) -> __m128d { + static_assert_rounding!(ROUNDING); let mut fnmsub: f64 = simd_extract(c, 0); if (k & 0b00000001) != 0 { let extracta: f64 = simd_extract(a, 0); let extracta = -extracta; let extractb: f64 = simd_extract(b, 0); let extractc = -fnmsub; - macro_rules! call { - ($imm4:expr) => { - vfmadd132sd(extracta, extractb, extractc, $imm4) - }; - } - fnmsub = constify_imm4_round!(rounding, call); + fnmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING); } let r = simd_insert(c, 0, fnmsub); transmute(r) @@ -36977,18 +36654,14 @@ pub unsafe fn _mm_maskz_cvt_roundss_sd(k: __mmask8, a: __m128d, b: __m128, sae: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundsd_ss&expand=1361) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2ss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_cvt_roundsd_ss(a: __m128, b: __m128d, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundsd_ss(a: __m128, b: __m128d) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f64x2(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2ss(a, b, zero, 0b11111111, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsd2ss(a, b, zero, 0b11111111, ROUNDING); transmute(r) } @@ -37003,24 +36676,19 @@ pub unsafe fn _mm_cvt_roundsd_ss(a: __m128, b: __m128d, rounding: i32) -> __m128 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_cvt_roundsd_ss&expand=1362) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2ss, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_cvt_roundsd_ss( +#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_cvt_roundsd_ss( src: __m128, k: __mmask8, a: __m128, b: __m128d, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f64x2(); let src = src.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2ss(a, b, src, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsd2ss(a, b, src, k, ROUNDING); transmute(r) } @@ -37035,23 +36703,18 @@ pub unsafe fn _mm_mask_cvt_roundsd_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_cvt_roundsd_ss&expand=1363) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2ss, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_cvt_roundsd_ss( +#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_cvt_roundsd_ss( k: __mmask8, a: __m128, b: __m128d, - rounding: i32, ) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f64x2(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2ss(a, b, zero, k, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsd2ss(a, b, zero, k, ROUNDING); transmute(r) } @@ -37066,16 +36729,12 @@ pub unsafe fn _mm_maskz_cvt_roundsd_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundss_si32&expand=1374) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2si, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundss_si32(a: __m128, rounding: i32) -> i32 { +#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundss_si32(a: __m128) -> i32 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2si(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtss2si(a, ROUNDING); transmute(r) } @@ -37090,16 +36749,12 @@ pub unsafe fn _mm_cvt_roundss_si32(a: __m128, rounding: i32) -> i32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundss_i32&expand=1369) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2si, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundss_i32(a: __m128, rounding: i32) -> i32 { +#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundss_i32(a: __m128) -> i32 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2si(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtss2si(a, ROUNDING); transmute(r) } @@ -37114,16 +36769,12 @@ pub unsafe fn _mm_cvt_roundss_i32(a: __m128, rounding: i32) -> i32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundss_u32&expand=1376) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2usi, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundss_u32(a: __m128, rounding: i32) -> u32 { +#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundss_u32(a: __m128) -> u32 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2usi(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtss2usi(a, ROUNDING); transmute(r) } @@ -37158,16 +36809,12 @@ pub unsafe fn _mm_cvtss_u32(a: __m128) -> u32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundsd_si32&expand=1359) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2si, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundsd_si32(a: __m128d, rounding: i32) -> i32 { +#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundsd_si32(a: __m128d) -> i32 { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2si(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsd2si(a, ROUNDING); transmute(r) } @@ -37182,16 +36829,12 @@ pub unsafe fn _mm_cvt_roundsd_si32(a: __m128d, rounding: i32) -> i32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundsd_i32&expand=1357) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2si, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundsd_i32(a: __m128d, rounding: i32) -> i32 { +#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundsd_i32(a: __m128d) -> i32 { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2si(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsd2si(a, ROUNDING); transmute(r) } @@ -37206,16 +36849,12 @@ pub unsafe fn _mm_cvt_roundsd_i32(a: __m128d, rounding: i32) -> i32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=cvt_roundsd_u32&expand=1364) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2usi, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundsd_u32(a: __m128d, rounding: i32) -> u32 { +#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundsd_u32(a: __m128d) -> u32 { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2usi(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsd2usi(a, ROUNDING); transmute(r) } @@ -37251,16 +36890,12 @@ pub unsafe fn _mm_cvtsd_u32(a: __m128d) -> u32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundi32_ss&expand=1312) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsi2ss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_cvt_roundi32_ss(a: __m128, b: i32, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundi32_ss(a: __m128, b: i32) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtsi2ss(a, b, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsi2ss(a, b, ROUNDING); transmute(r) } @@ -37276,16 +36911,12 @@ pub unsafe fn _mm_cvt_roundi32_ss(a: __m128, b: i32, rounding: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundsi32_ss&expand=1366) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsi2ss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_cvt_roundsi32_ss(a: __m128, b: i32, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundsi32_ss(a: __m128, b: i32) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtsi2ss(a, b, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsi2ss(a, b, ROUNDING); transmute(r) } @@ -37300,16 +36931,12 @@ pub unsafe fn _mm_cvt_roundsi32_ss(a: __m128, b: i32, rounding: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundu32_ss&expand=1378) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtusi2ss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_cvt_roundu32_ss(a: __m128, b: u32, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundu32_ss(a: __m128, b: u32) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtusi2ss(a, b, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtusi2ss(a, b, ROUNDING); transmute(r) } @@ -44343,10 +43970,10 @@ mod tests { let a = _mm512_setr_ps( 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); - let r = _mm512_cvt_roundps_epi32(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); assert_eq_m512i(r, e); - let r = _mm512_cvt_roundps_epi32(a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a); let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 9, 10, 11, 12, 13, 14, 15); assert_eq_m512i(r, e); } @@ -44357,14 +43984,14 @@ mod tests { 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); let src = _mm512_set1_epi32(0); - let r = - _mm512_mask_cvt_roundps_epi32(src, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, + ); assert_eq_m512i(r, src); - let r = _mm512_mask_cvt_roundps_epi32( + let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( src, 0b00000000_11111111, a, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); @@ -44375,12 +44002,13 @@ mod tests { let a = _mm512_setr_ps( 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); - let r = _mm512_maskz_cvt_roundps_epi32(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, + ); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_cvt_roundps_epi32( + let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( 0b00000000_11111111, a, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); @@ -44391,10 +44019,10 @@ mod tests { let a = _mm512_setr_ps( 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); - let r = _mm512_cvt_roundps_epu32(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16); assert_eq_m512i(r, e); - let r = _mm512_cvt_roundps_epu32(a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a); let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15); assert_eq_m512i(r, e); } @@ -44405,14 +44033,14 @@ mod tests { 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); let src = _mm512_set1_epi32(0); - let r = - _mm512_mask_cvt_roundps_epu32(src, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, + ); assert_eq_m512i(r, src); - let r = _mm512_mask_cvt_roundps_epu32( + let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( src, 0b00000000_11111111, a, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); @@ -44423,12 +44051,13 @@ mod tests { let a = _mm512_setr_ps( 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); - let r = _mm512_maskz_cvt_roundps_epu32(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, + ); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_cvt_roundps_epu32( + let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( 0b00000000_11111111, a, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); @@ -44437,7 +44066,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvt_roundepi32_ps() { let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); - let r = _mm512_cvt_roundepi32_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); let e = _mm512_setr_ps( 0., -2., 2., -4., 4., -6., 6., -8., 8., 10., 10., 12., 12., 14., 14., 16., ); @@ -44448,14 +44077,14 @@ mod tests { unsafe fn test_mm512_mask_cvt_roundepi32_ps() { let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); let src = _mm512_set1_ps(0.); - let r = - _mm512_mask_cvt_roundepi32_ps(src, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, + ); assert_eq_m512(r, src); - let r = _mm512_mask_cvt_roundepi32_ps( + let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( src, 0b00000000_11111111, a, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); let e = _mm512_setr_ps( 0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0., @@ -44466,12 +44095,13 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvt_roundepi32_ps() { let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); - let r = _mm512_maskz_cvt_roundepi32_ps(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, + ); assert_eq_m512(r, _mm512_setzero_ps()); - let r = _mm512_maskz_cvt_roundepi32_ps( + let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( 0b00000000_11111111, a, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); let e = _mm512_setr_ps( 0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0., @@ -44482,7 +44112,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvt_roundepu32_ps() { let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); - let r = _mm512_cvt_roundepu32_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); #[rustfmt::skip] let e = _mm512_setr_ps( 0., 4294967300., 2., 4294967300., @@ -44497,14 +44127,14 @@ mod tests { unsafe fn test_mm512_mask_cvt_roundepu32_ps() { let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); let src = _mm512_set1_ps(0.); - let r = - _mm512_mask_cvt_roundepu32_ps(src, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + src, 0, a, + ); assert_eq_m512(r, src); - let r = _mm512_mask_cvt_roundepu32_ps( + let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( src, 0b00000000_11111111, a, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); #[rustfmt::skip] let e = _mm512_setr_ps( @@ -44519,12 +44149,13 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvt_roundepu32_ps() { let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); - let r = _mm512_maskz_cvt_roundepu32_ps(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, + ); assert_eq_m512(r, _mm512_setzero_ps()); - let r = _mm512_maskz_cvt_roundepu32_ps( + let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( 0b00000000_11111111, a, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); #[rustfmt::skip] let e = _mm512_setr_ps( @@ -52671,7 +52302,7 @@ mod tests { unsafe fn test_mm_add_round_ss() { let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_add_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(1., 2., 10., 60.); assert_eq_m128(r, e); } @@ -52681,15 +52312,11 @@ mod tests { let src = _mm_set_ps(10., 11., 100., 110.); let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_mask_add_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); let e = _mm_set_ps(1., 2., 10., 110.); assert_eq_m128(r, e); - let r = _mm_mask_add_round_ss( - src, - 0b11111111, - a, - b, - _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC, + let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, ); let e = _mm_set_ps(1., 2., 10., 60.); assert_eq_m128(r, e); @@ -52699,10 +52326,11 @@ mod tests { unsafe fn test_mm_maskz_add_round_ss() { let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_maskz_add_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_ps(1., 2., 10., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_add_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); let e = _mm_set_ps(1., 2., 10., 60.); assert_eq_m128(r, e); } @@ -52711,7 +52339,7 @@ mod tests { unsafe fn test_mm_add_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_add_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_pd(1., 6.); assert_eq_m128d(r, e); } @@ -52721,15 +52349,11 @@ mod tests { let src = _mm_set_pd(10., 11.); let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_mask_add_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); let e = _mm_set_pd(1., 11.); assert_eq_m128d(r, e); - let r = _mm_mask_add_round_sd( - src, - 0b11111111, - a, - b, - _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC, + let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, ); let e = _mm_set_pd(1., 6.); assert_eq_m128d(r, e); @@ -52739,10 +52363,11 @@ mod tests { unsafe fn test_mm_maskz_add_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_maskz_add_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_pd(1., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_add_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); let e = _mm_set_pd(1., 6.); assert_eq_m128d(r, e); } @@ -52751,7 +52376,7 @@ mod tests { unsafe fn test_mm_sub_round_ss() { let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_sub_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(1., 2., 10., -20.); assert_eq_m128(r, e); } @@ -52761,15 +52386,11 @@ mod tests { let src = _mm_set_ps(10., 11., 100., 110.); let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_mask_sub_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); let e = _mm_set_ps(1., 2., 10., 110.); assert_eq_m128(r, e); - let r = _mm_mask_sub_round_ss( - src, - 0b11111111, - a, - b, - _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC, + let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, ); let e = _mm_set_ps(1., 2., 10., -20.); assert_eq_m128(r, e); @@ -52779,10 +52400,11 @@ mod tests { unsafe fn test_mm_maskz_sub_round_ss() { let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_maskz_sub_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_ps(1., 2., 10., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_sub_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); let e = _mm_set_ps(1., 2., 10., -20.); assert_eq_m128(r, e); } @@ -52791,7 +52413,7 @@ mod tests { unsafe fn test_mm_sub_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_sub_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_pd(1., -2.); assert_eq_m128d(r, e); } @@ -52801,15 +52423,11 @@ mod tests { let src = _mm_set_pd(10., 11.); let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_mask_sub_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); let e = _mm_set_pd(1., 11.); assert_eq_m128d(r, e); - let r = _mm_mask_sub_round_sd( - src, - 0b11111111, - a, - b, - _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC, + let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, ); let e = _mm_set_pd(1., -2.); assert_eq_m128d(r, e); @@ -52819,10 +52437,11 @@ mod tests { unsafe fn test_mm_maskz_sub_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_maskz_sub_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_pd(1., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_sub_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); let e = _mm_set_pd(1., -2.); assert_eq_m128d(r, e); } @@ -52831,7 +52450,7 @@ mod tests { unsafe fn test_mm_mul_round_ss() { let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_mul_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(1., 2., 10., 800.); assert_eq_m128(r, e); } @@ -52841,15 +52460,11 @@ mod tests { let src = _mm_set_ps(10., 11., 100., 110.); let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_mask_mul_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); let e = _mm_set_ps(1., 2., 10., 110.); assert_eq_m128(r, e); - let r = _mm_mask_mul_round_ss( - src, - 0b11111111, - a, - b, - _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC, + let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, ); let e = _mm_set_ps(1., 2., 10., 800.); assert_eq_m128(r, e); @@ -52859,10 +52474,11 @@ mod tests { unsafe fn test_mm_maskz_mul_round_ss() { let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_maskz_mul_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_ps(1., 2., 10., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_mul_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); let e = _mm_set_ps(1., 2., 10., 800.); assert_eq_m128(r, e); } @@ -52871,7 +52487,7 @@ mod tests { unsafe fn test_mm_mul_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_mul_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_pd(1., 8.); assert_eq_m128d(r, e); } @@ -52881,15 +52497,11 @@ mod tests { let src = _mm_set_pd(10., 11.); let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_mask_mul_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); let e = _mm_set_pd(1., 11.); assert_eq_m128d(r, e); - let r = _mm_mask_mul_round_sd( - src, - 0b11111111, - a, - b, - _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC, + let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, ); let e = _mm_set_pd(1., 8.); assert_eq_m128d(r, e); @@ -52899,10 +52511,11 @@ mod tests { unsafe fn test_mm_maskz_mul_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_maskz_mul_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_pd(1., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_mul_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); let e = _mm_set_pd(1., 8.); assert_eq_m128d(r, e); } @@ -52911,7 +52524,7 @@ mod tests { unsafe fn test_mm_div_round_ss() { let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_div_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(1., 2., 10., 0.5); assert_eq_m128(r, e); } @@ -52921,15 +52534,11 @@ mod tests { let src = _mm_set_ps(10., 11., 100., 110.); let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_mask_div_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); let e = _mm_set_ps(1., 2., 10., 110.); assert_eq_m128(r, e); - let r = _mm_mask_div_round_ss( - src, - 0b11111111, - a, - b, - _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC, + let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, ); let e = _mm_set_ps(1., 2., 10., 0.5); assert_eq_m128(r, e); @@ -52939,10 +52548,11 @@ mod tests { unsafe fn test_mm_maskz_div_round_ss() { let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 40.); - let r = _mm_maskz_div_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_ps(1., 2., 10., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_div_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); let e = _mm_set_ps(1., 2., 10., 0.5); assert_eq_m128(r, e); } @@ -52951,7 +52561,7 @@ mod tests { unsafe fn test_mm_div_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_div_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_pd(1., 0.5); assert_eq_m128d(r, e); } @@ -52961,15 +52571,11 @@ mod tests { let src = _mm_set_pd(10., 11.); let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_mask_div_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); let e = _mm_set_pd(1., 11.); assert_eq_m128d(r, e); - let r = _mm_mask_div_round_sd( - src, - 0b11111111, - a, - b, - _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC, + let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, ); let e = _mm_set_pd(1., 0.5); assert_eq_m128d(r, e); @@ -52979,10 +52585,11 @@ mod tests { unsafe fn test_mm_maskz_div_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_maskz_div_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_pd(1., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_div_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); let e = _mm_set_pd(1., 0.5); assert_eq_m128d(r, e); } @@ -53123,7 +52730,7 @@ mod tests { unsafe fn test_mm_sqrt_round_ss() { let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 4.); - let r = _mm_sqrt_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(1., 2., 10., 2.); assert_eq_m128(r, e); } @@ -53133,15 +52740,11 @@ mod tests { let src = _mm_set_ps(10., 11., 100., 110.); let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 4.); - let r = _mm_mask_sqrt_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); let e = _mm_set_ps(1., 2., 10., 110.); assert_eq_m128(r, e); - let r = _mm_mask_sqrt_round_ss( - src, - 0b11111111, - a, - b, - _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC, + let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, ); let e = _mm_set_ps(1., 2., 10., 2.); assert_eq_m128(r, e); @@ -53151,10 +52754,11 @@ mod tests { unsafe fn test_mm_maskz_sqrt_round_ss() { let a = _mm_set_ps(1., 2., 10., 20.); let b = _mm_set_ps(3., 4., 30., 4.); - let r = _mm_maskz_sqrt_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_ps(1., 2., 10., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_sqrt_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); let e = _mm_set_ps(1., 2., 10., 2.); assert_eq_m128(r, e); } @@ -53163,7 +52767,7 @@ mod tests { unsafe fn test_mm_sqrt_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_sqrt_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_pd(1., 2.); assert_eq_m128d(r, e); } @@ -53173,15 +52777,11 @@ mod tests { let src = _mm_set_pd(10., 11.); let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_mask_sqrt_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b); let e = _mm_set_pd(1., 11.); assert_eq_m128d(r, e); - let r = _mm_mask_sqrt_round_sd( - src, - 0b11111111, - a, - b, - _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC, + let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + src, 0b11111111, a, b, ); let e = _mm_set_pd(1., 2.); assert_eq_m128d(r, e); @@ -53191,10 +52791,11 @@ mod tests { unsafe fn test_mm_maskz_sqrt_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_pd(3., 4.); - let r = _mm_maskz_sqrt_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_pd(1., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_sqrt_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b); let e = _mm_set_pd(1., 2.); assert_eq_m128d(r, e); } @@ -53473,7 +53074,7 @@ mod tests { unsafe fn test_mm_scalef_round_ss() { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(3.); - let r = _mm_scalef_round_ss(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(1., 1., 1., 8.); assert_eq_m128(r, e); } @@ -53482,15 +53083,13 @@ mod tests { unsafe fn test_mm_mask_scalef_round_ss() { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(3.); - let r = _mm_mask_scalef_round_ss(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, a, b, + ); let e = _mm_set_ps(1., 1., 1., 1.); assert_eq_m128(r, e); - let r = _mm_mask_scalef_round_ss( - a, - 0b11111111, - a, - b, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, a, b, ); let e = _mm_set_ps(1., 1., 1., 8.); assert_eq_m128(r, e); @@ -53500,14 +53099,12 @@ mod tests { unsafe fn test_mm_maskz_scalef_round_ss() { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(3.); - let r = _mm_maskz_scalef_round_ss(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_ps(1., 1., 1., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_scalef_round_ss( - 0b11111111, - a, - b, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, ); let e = _mm_set_ps(1., 1., 1., 8.); assert_eq_m128(r, e); @@ -53517,7 +53114,7 @@ mod tests { unsafe fn test_mm_scalef_round_sd() { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(3.); - let r = _mm_scalef_round_sd(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_pd(1., 8.); assert_eq_m128d(r, e); } @@ -53526,15 +53123,13 @@ mod tests { unsafe fn test_mm_mask_scalef_round_sd() { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(3.); - let r = _mm_mask_scalef_round_sd(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, a, b, + ); let e = _mm_set_pd(1., 1.); assert_eq_m128d(r, e); - let r = _mm_mask_scalef_round_sd( - a, - 0b11111111, - a, - b, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, a, b, ); let e = _mm_set_pd(1., 8.); assert_eq_m128d(r, e); @@ -53544,14 +53139,12 @@ mod tests { unsafe fn test_mm_maskz_scalef_round_sd() { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(3.); - let r = _mm_maskz_scalef_round_sd(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = + _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_pd(1., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_scalef_round_sd( - 0b11111111, - a, - b, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, ); let e = _mm_set_pd(1., 8.); assert_eq_m128d(r, e); @@ -53562,7 +53155,7 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_fmadd_round_ss(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); let e = _mm_set_ps(1., 1., 1., 5.); assert_eq_m128(r, e); } @@ -53572,14 +53165,12 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_mask_fmadd_round_ss(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); assert_eq_m128(r, a); - let r = _mm_mask_fmadd_round_ss( - a, - 0b11111111, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, ); let e = _mm_set_ps(1., 1., 1., 5.); assert_eq_m128(r, e); @@ -53590,15 +53181,13 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_maskz_fmadd_round_ss(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); let e = _mm_set_ps(1., 1., 1., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_fmadd_round_ss( - 0b11111111, - a, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, ); let e = _mm_set_ps(1., 1., 1., 5.); assert_eq_m128(r, e); @@ -53609,14 +53198,12 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_mask3_fmadd_round_ss(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); assert_eq_m128(r, c); - let r = _mm_mask3_fmadd_round_ss( - a, - b, - c, - 0b11111111, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, ); let e = _mm_set_ps(3., 3., 3., 5.); assert_eq_m128(r, e); @@ -53627,7 +53214,7 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_fmadd_round_sd(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); let e = _mm_set_pd(1., 5.); assert_eq_m128d(r, e); } @@ -53637,14 +53224,12 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_mask_fmadd_round_sd(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); assert_eq_m128d(r, a); - let r = _mm_mask_fmadd_round_sd( - a, - 0b11111111, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, ); let e = _mm_set_pd(1., 5.); assert_eq_m128d(r, e); @@ -53655,15 +53240,13 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_maskz_fmadd_round_sd(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); let e = _mm_set_pd(1., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_fmadd_round_sd( - 0b11111111, - a, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, ); let e = _mm_set_pd(1., 5.); assert_eq_m128d(r, e); @@ -53674,14 +53257,12 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_mask3_fmadd_round_sd(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); assert_eq_m128d(r, c); - let r = _mm_mask3_fmadd_round_sd( - a, - b, - c, - 0b11111111, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, ); let e = _mm_set_pd(3., 5.); assert_eq_m128d(r, e); @@ -53692,7 +53273,7 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_fmsub_round_ss(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); let e = _mm_set_ps(1., 1., 1., -1.); assert_eq_m128(r, e); } @@ -53702,14 +53283,12 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_mask_fmsub_round_ss(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); assert_eq_m128(r, a); - let r = _mm_mask_fmsub_round_ss( - a, - 0b11111111, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, ); let e = _mm_set_ps(1., 1., 1., -1.); assert_eq_m128(r, e); @@ -53720,15 +53299,13 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_maskz_fmsub_round_ss(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); let e = _mm_set_ps(1., 1., 1., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_fmsub_round_ss( - 0b11111111, - a, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, ); let e = _mm_set_ps(1., 1., 1., -1.); assert_eq_m128(r, e); @@ -53739,14 +53316,12 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_mask3_fmsub_round_ss(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); assert_eq_m128(r, c); - let r = _mm_mask3_fmsub_round_ss( - a, - b, - c, - 0b11111111, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, ); let e = _mm_set_ps(3., 3., 3., -1.); assert_eq_m128(r, e); @@ -53757,7 +53332,7 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_fmsub_round_sd(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); let e = _mm_set_pd(1., -1.); assert_eq_m128d(r, e); } @@ -53767,14 +53342,12 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_mask_fmsub_round_sd(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); assert_eq_m128d(r, a); - let r = _mm_mask_fmsub_round_sd( - a, - 0b11111111, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, ); let e = _mm_set_pd(1., -1.); assert_eq_m128d(r, e); @@ -53785,15 +53358,13 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_maskz_fmsub_round_sd(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); let e = _mm_set_pd(1., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_fmsub_round_sd( - 0b11111111, - a, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, ); let e = _mm_set_pd(1., -1.); assert_eq_m128d(r, e); @@ -53804,14 +53375,12 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_mask3_fmsub_round_sd(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); assert_eq_m128d(r, c); - let r = _mm_mask3_fmsub_round_sd( - a, - b, - c, - 0b11111111, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, ); let e = _mm_set_pd(3., -1.); assert_eq_m128d(r, e); @@ -53822,7 +53391,7 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_fnmadd_round_ss(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); let e = _mm_set_ps(1., 1., 1., 1.); assert_eq_m128(r, e); } @@ -53832,14 +53401,12 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_mask_fnmadd_round_ss(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); assert_eq_m128(r, a); - let r = _mm_mask_fnmadd_round_ss( - a, - 0b11111111, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, ); let e = _mm_set_ps(1., 1., 1., 1.); assert_eq_m128(r, e); @@ -53850,16 +53417,13 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = - _mm_maskz_fnmadd_round_ss(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); let e = _mm_set_ps(1., 1., 1., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_fnmadd_round_ss( - 0b11111111, - a, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, ); let e = _mm_set_ps(1., 1., 1., 1.); assert_eq_m128(r, e); @@ -53870,15 +53434,12 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = - _mm_mask3_fnmadd_round_ss(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); assert_eq_m128(r, c); - let r = _mm_mask3_fnmadd_round_ss( - a, - b, - c, - 0b11111111, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, ); let e = _mm_set_ps(3., 3., 3., 1.); assert_eq_m128(r, e); @@ -53889,7 +53450,7 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_fnmadd_round_sd(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); let e = _mm_set_pd(1., 1.); assert_eq_m128d(r, e); } @@ -53899,14 +53460,12 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_mask_fnmadd_round_sd(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); assert_eq_m128d(r, a); - let r = _mm_mask_fnmadd_round_sd( - a, - 0b11111111, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, ); let e = _mm_set_pd(1., 1.); assert_eq_m128d(r, e); @@ -53917,16 +53476,13 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = - _mm_maskz_fnmadd_round_sd(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); let e = _mm_set_pd(1., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_fnmadd_round_sd( - 0b11111111, - a, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, ); let e = _mm_set_pd(1., 1.); assert_eq_m128d(r, e); @@ -53937,15 +53493,12 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = - _mm_mask3_fnmadd_round_sd(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); assert_eq_m128d(r, c); - let r = _mm_mask3_fnmadd_round_sd( - a, - b, - c, - 0b11111111, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, ); let e = _mm_set_pd(3., 1.); assert_eq_m128d(r, e); @@ -53956,7 +53509,7 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_fnmsub_round_ss(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); let e = _mm_set_ps(1., 1., 1., -5.); assert_eq_m128(r, e); } @@ -53966,14 +53519,12 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = _mm_mask_fnmsub_round_ss(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); assert_eq_m128(r, a); - let r = _mm_mask_fnmsub_round_ss( - a, - 0b11111111, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, ); let e = _mm_set_ps(1., 1., 1., -5.); assert_eq_m128(r, e); @@ -53984,16 +53535,13 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = - _mm_maskz_fnmsub_round_ss(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); let e = _mm_set_ps(1., 1., 1., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_fnmsub_round_ss( - 0b11111111, - a, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, ); let e = _mm_set_ps(1., 1., 1., -5.); assert_eq_m128(r, e); @@ -54004,15 +53552,12 @@ mod tests { let a = _mm_set1_ps(1.); let b = _mm_set1_ps(2.); let c = _mm_set1_ps(3.); - let r = - _mm_mask3_fnmsub_round_ss(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); assert_eq_m128(r, c); - let r = _mm_mask3_fnmsub_round_ss( - a, - b, - c, - 0b11111111, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, ); let e = _mm_set_ps(3., 3., 3., -5.); assert_eq_m128(r, e); @@ -54023,7 +53568,7 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_fnmsub_round_sd(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); let e = _mm_set_pd(1., -5.); assert_eq_m128d(r, e); } @@ -54033,14 +53578,12 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = _mm_mask_fnmsub_round_sd(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); assert_eq_m128d(r, a); - let r = _mm_mask_fnmsub_round_sd( - a, - 0b11111111, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11111111, b, c, ); let e = _mm_set_pd(1., -5.); assert_eq_m128d(r, e); @@ -54051,16 +53594,13 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = - _mm_maskz_fnmsub_round_sd(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); let e = _mm_set_pd(1., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_fnmsub_round_sd( - 0b11111111, - a, - b, - c, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, c, ); let e = _mm_set_pd(1., -5.); assert_eq_m128d(r, e); @@ -54071,15 +53611,12 @@ mod tests { let a = _mm_set1_pd(1.); let b = _mm_set1_pd(2.); let c = _mm_set1_pd(3.); - let r = - _mm_mask3_fnmsub_round_sd(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); assert_eq_m128d(r, c); - let r = _mm_mask3_fnmsub_round_sd( - a, - b, - c, - 0b11111111, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b11111111, ); let e = _mm_set_pd(3., -5.); assert_eq_m128d(r, e); @@ -54299,7 +53836,7 @@ mod tests { unsafe fn test_mm_cvt_roundsd_ss() { let a = _mm_set_ps(0., -0.5, 1., -1.5); let b = _mm_set_pd(6., -7.5); - let r = _mm_cvt_roundsd_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(0., -0.5, 1., -7.5); assert_eq_m128(r, e); } @@ -54308,10 +53845,11 @@ mod tests { unsafe fn test_mm_mask_cvt_roundsd_ss() { let a = _mm_set_ps(0., -0.5, 1., -1.5); let b = _mm_set_pd(6., -7.5); - let r = _mm_mask_cvt_roundsd_ss(a, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b); assert_eq_m128(r, a); - let r = - _mm_mask_cvt_roundsd_ss(a, 0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + a, 0b11111111, a, b, + ); let e = _mm_set_ps(0., -0.5, 1., -7.5); assert_eq_m128(r, e); } @@ -54320,10 +53858,12 @@ mod tests { unsafe fn test_mm_maskz_cvt_roundsd_ss() { let a = _mm_set_ps(0., -0.5, 1., -1.5); let b = _mm_set_pd(6., -7.5); - let r = _mm_maskz_cvt_roundsd_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); let e = _mm_set_ps(0., -0.5, 1., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_cvt_roundsd_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>( + 0b11111111, a, b, + ); let e = _mm_set_ps(0., -0.5, 1., -7.5); assert_eq_m128(r, e); } @@ -54331,7 +53871,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundss_si32() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvt_roundss_si32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundss_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: i32 = -1; assert_eq!(r, e); } @@ -54339,7 +53879,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundss_i32() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvt_roundss_i32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundss_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: i32 = -1; assert_eq!(r, e); } @@ -54347,7 +53887,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundss_u32() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvt_roundss_u32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundss_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: u32 = u32::MAX; assert_eq!(r, e); } @@ -54371,7 +53911,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundsd_si32() { let a = _mm_set_pd(1., -1.5); - let r = _mm_cvt_roundsd_si32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundsd_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: i32 = -1; assert_eq!(r, e); } @@ -54379,7 +53919,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundsd_i32() { let a = _mm_set_pd(1., -1.5); - let r = _mm_cvt_roundsd_i32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundsd_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: i32 = -1; assert_eq!(r, e); } @@ -54387,7 +53927,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundsd_u32() { let a = _mm_set_pd(1., -1.5); - let r = _mm_cvt_roundsd_u32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundsd_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: u32 = u32::MAX; assert_eq!(r, e); } @@ -54412,7 +53952,7 @@ mod tests { unsafe fn test_mm_cvt_roundi32_ss() { let a = _mm_set_ps(0., -0.5, 1., -1.5); let b: i32 = 9; - let r = _mm_cvt_roundi32_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(0., -0.5, 1., 9.); assert_eq_m128(r, e); } @@ -54421,7 +53961,7 @@ mod tests { unsafe fn test_mm_cvt_roundsi32_ss() { let a = _mm_set_ps(0., -0.5, 1., -1.5); let b: i32 = 9; - let r = _mm_cvt_roundsi32_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundsi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(0., -0.5, 1., 9.); assert_eq_m128(r, e); } @@ -54430,7 +53970,7 @@ mod tests { unsafe fn test_mm_cvt_roundu32_ss() { let a = _mm_set_ps(0., -0.5, 1., -1.5); let b: u32 = 9; - let r = _mm_cvt_roundu32_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundu32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(0., -0.5, 1., 9.); assert_eq_m128(r, e); } diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index caaf3e6d73..ae6202bc73 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -6288,7 +6288,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvt_roundpd_ps() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_cvt_roundpd_ps(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(a); let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); assert_eq_m256(r, e); } @@ -6297,9 +6297,9 @@ mod tests { unsafe fn test_mm512_mask_cvt_roundpd_ps() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); let src = _mm256_set1_ps(0.); - let r = _mm512_mask_cvt_roundpd_ps(src, 0, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_mask_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(src, 0, a); assert_eq_m256(r, src); - let r = _mm512_mask_cvt_roundpd_ps(src, 0b00001111, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_mask_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(src, 0b00001111, a); let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 0., 0., 0., 0.); assert_eq_m256(r, e); } @@ -6307,9 +6307,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvt_roundpd_ps() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_maskz_cvt_roundpd_ps(0, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_maskz_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(0, a); assert_eq_m256(r, _mm256_setzero_ps()); - let r = _mm512_maskz_cvt_roundpd_ps(0b00001111, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_maskz_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a); let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 0., 0., 0., 0.); assert_eq_m256(r, e); } @@ -6317,7 +6317,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvt_roundpd_epi32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_cvt_roundpd_epi32(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(a); let e = _mm256_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8); assert_eq_m256i(r, e); } @@ -6326,9 +6326,9 @@ mod tests { unsafe fn test_mm512_mask_cvt_roundpd_epi32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); let src = _mm256_set1_epi32(0); - let r = _mm512_mask_cvt_roundpd_epi32(src, 0, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_mask_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(src, 0, a); assert_eq_m256i(r, src); - let r = _mm512_mask_cvt_roundpd_epi32(src, 0b00001111, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_mask_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(src, 0b00001111, a); let e = _mm256_setr_epi32(0, -2, 2, -4, 0, 0, 0, 0); assert_eq_m256i(r, e); } @@ -6336,9 +6336,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvt_roundpd_epi32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_maskz_cvt_roundpd_epi32(0, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_maskz_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm512_maskz_cvt_roundpd_epi32(0b00001111, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_maskz_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a); let e = _mm256_setr_epi32(0, -2, 2, -4, 0, 0, 0, 0); assert_eq_m256i(r, e); } @@ -6346,7 +6346,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvt_roundpd_epu32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_cvt_roundpd_epu32(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(a); let e = _mm256_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1); assert_eq_m256i(r, e); } @@ -6355,9 +6355,9 @@ mod tests { unsafe fn test_mm512_mask_cvt_roundpd_epu32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); let src = _mm256_set1_epi32(0); - let r = _mm512_mask_cvt_roundpd_epu32(src, 0, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_mask_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(src, 0, a); assert_eq_m256i(r, src); - let r = _mm512_mask_cvt_roundpd_epu32(src, 0b00001111, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_mask_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(src, 0b00001111, a); let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0); assert_eq_m256i(r, e); } @@ -6365,9 +6365,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvt_roundpd_epu32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_maskz_cvt_roundpd_epu32(0, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_maskz_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm512_maskz_cvt_roundpd_epu32(0b00001111, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_maskz_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a); let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0); assert_eq_m256i(r, e); } From 76acb956f5677458533530ae9db84dacda26bf8a Mon Sep 17 00:00:00 2001 From: jironglin Date: Wed, 3 Mar 2021 15:46:38 +0000 Subject: [PATCH 019/123] cvt_roundps_pd; cvt_roundps_ph; cvt_roundph_ps; cvtps_ph; cvtt_roundps,pd_epi32,epu32; mm_max,min_round_ss,sd; mm_getexp_ss,sd; mm_cvt_roundss_sd; cvt_roundss_si32,i32,u32; mm_cvtt_roundsd_si32,i32,u32 --- crates/core_arch/src/x86/avx512f.rs | 905 ++++++++++--------------- crates/core_arch/src/x86_64/avx512f.rs | 30 +- 2 files changed, 375 insertions(+), 560 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 7911157eb2..bcd826d700 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -13533,17 +13533,13 @@ pub unsafe fn _mm512_maskz_cvt_roundps_epu32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundps_pd&expand=1347) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2pd, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvt_roundps_pd(a: __m256, sae: i32) -> __m512d { +#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvt_roundps_pd(a: __m256) -> __m512d { + static_assert_sae!(SAE); let a = a.as_f32x8(); let zero = _mm512_setzero_pd().as_f64x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2pd(a, zero, 0b11111111, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtps2pd(a, zero, 0b11111111, SAE); transmute(r) } @@ -13553,22 +13549,17 @@ pub unsafe fn _mm512_cvt_roundps_pd(a: __m256, sae: i32) -> __m512d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundps_epi32&expand=1336) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2pd, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvt_roundps_pd( +#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvt_roundps_pd( src: __m512d, k: __mmask8, a: __m256, - sae: i32, ) -> __m512d { + static_assert_sae!(SAE); let a = a.as_f32x8(); let src = src.as_f64x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2pd(a, src, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtps2pd(a, src, k, SAE); transmute(r) } @@ -13578,17 +13569,13 @@ pub unsafe fn _mm512_mask_cvt_roundps_pd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundps_epi32&expand=1337) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2pd, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvt_roundps_pd(k: __mmask8, a: __m256, sae: i32) -> __m512d { +#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvt_roundps_pd(k: __mmask8, a: __m256) -> __m512d { + static_assert_sae!(SAE); let a = a.as_f32x8(); let zero = _mm512_setzero_pd().as_f64x8(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2pd(a, zero, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtps2pd(a, zero, k, SAE); transmute(r) } @@ -13956,17 +13943,13 @@ pub unsafe fn _mm512_maskz_cvt_roundepu32_ps( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundps_ph&expand=1354) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvt_roundps_ph(a: __m512, sae: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvt_roundps_ph(a: __m512) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let zero = _mm256_setzero_si256().as_i16x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2ph(a, $imm4, zero, 0b11111111_11111111) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtps2ph(a, SAE, zero, 0b11111111_11111111); transmute(r) } @@ -13976,22 +13959,17 @@ pub unsafe fn _mm512_cvt_roundps_ph(a: __m512, sae: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundps_ph&expand=1355) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvt_roundps_ph( +#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvt_roundps_ph( src: __m256i, k: __mmask16, a: __m512, - sae: i32, ) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let src = src.as_i16x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2ph(a, $imm4, src, k) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtps2ph(a, SAE, src, k); transmute(r) } @@ -14001,17 +13979,13 @@ pub unsafe fn _mm512_mask_cvt_roundps_ph( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundps_ph&expand=1356) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvt_roundps_ph(k: __mmask16, a: __m512, sae: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvt_roundps_ph(k: __mmask16, a: __m512) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let zero = _mm256_setzero_si256().as_i16x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2ph(a, $imm4, zero, k) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtps2ph(a, SAE, zero, k); transmute(r) } @@ -14126,17 +14100,13 @@ pub unsafe fn _mm_maskz_cvt_roundps_ph(k: __mmask8, a: __m128, imm8: i32) -> __m /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtps_ph&expand=1778) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvtps_ph(a: __m512, sae: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvtps_ph(a: __m512) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let zero = _mm256_setzero_si256().as_i16x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2ph(a, $imm4, zero, 0b11111111_11111111) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtps2ph(a, SAE, zero, 0b11111111_11111111); transmute(r) } @@ -14146,17 +14116,17 @@ pub unsafe fn _mm512_cvtps_ph(a: __m512, sae: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtps_ph&expand=1779) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvtps_ph(src: __m256i, k: __mmask16, a: __m512, sae: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvtps_ph( + src: __m256i, + k: __mmask16, + a: __m512, +) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let src = src.as_i16x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2ph(a, $imm4, src, k) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtps2ph(a, SAE, src, k); transmute(r) } @@ -14166,17 +14136,13 @@ pub unsafe fn _mm512_mask_cvtps_ph(src: __m256i, k: __mmask16, a: __m512, sae: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtps_ph&expand=1780) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvtps_ph(k: __mmask16, a: __m512, sae: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvtps_ph(k: __mmask16, a: __m512) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let zero = _mm256_setzero_si256().as_i16x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtps2ph(a, $imm4, zero, k) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtps2ph(a, SAE, zero, k); transmute(r) } @@ -14286,17 +14252,13 @@ pub unsafe fn _mm_maskz_cvtps_ph(k: __mmask8, a: __m128, imm8: i32) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_ps&expand=1332) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtph2ps, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvt_roundph_ps(a: __m256i, sae: i32) -> __m512 { +#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvt_roundph_ps(a: __m256i) -> __m512 { + static_assert_sae!(SAE); let a = a.as_i16x16(); let zero = _mm512_setzero_ps().as_f32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtph2ps(a, zero, 0b11111111_11111111, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtph2ps(a, zero, 0b11111111_11111111, SAE); transmute(r) } @@ -14306,22 +14268,17 @@ pub unsafe fn _mm512_cvt_roundph_ps(a: __m256i, sae: i32) -> __m512 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_ps&expand=1333) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtph2ps, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvt_roundph_ps( +#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvt_roundph_ps( src: __m512, k: __mmask16, a: __m256i, - sae: i32, ) -> __m512 { + static_assert_sae!(SAE); let a = a.as_i16x16(); let src = src.as_f32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtph2ps(a, src, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtph2ps(a, src, k, SAE); transmute(r) } @@ -14331,17 +14288,13 @@ pub unsafe fn _mm512_mask_cvt_roundph_ps( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_ps&expand=1334) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtph2ps, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvt_roundph_ps(k: __mmask16, a: __m256i, sae: i32) -> __m512 { +#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvt_roundph_ps(k: __mmask16, a: __m256i) -> __m512 { + static_assert_sae!(SAE); let a = a.as_i16x16(); let zero = _mm512_setzero_ps().as_f32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvtph2ps(a, zero, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtph2ps(a, zero, k, SAE); transmute(r) } @@ -14442,17 +14395,13 @@ pub unsafe fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundps_epi32&expand=1916) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttps2dq, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvtt_roundps_epi32(a: __m512, sae: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvtt_roundps_epi32(a: __m512) -> __m512i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let zero = _mm512_setzero_si512().as_i32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvttps2dq(a, zero, 0b11111111_11111111, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttps2dq(a, zero, 0b11111111_11111111, SAE); transmute(r) } @@ -14462,22 +14411,17 @@ pub unsafe fn _mm512_cvtt_roundps_epi32(a: __m512, sae: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundps_epi32&expand=1917) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttps2dq, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvtt_roundps_epi32( +#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvtt_roundps_epi32( src: __m512i, k: __mmask16, a: __m512, - sae: i32, ) -> __m512i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let src = src.as_i32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvttps2dq(a, src, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttps2dq(a, src, k, SAE); transmute(r) } @@ -14487,17 +14431,13 @@ pub unsafe fn _mm512_mask_cvtt_roundps_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttps2dq, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvtt_roundps_epi32(k: __mmask16, a: __m512, sae: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvtt_roundps_epi32(k: __mmask16, a: __m512) -> __m512i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let zero = _mm512_setzero_si512().as_i32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvttps2dq(a, zero, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttps2dq(a, zero, k, SAE); transmute(r) } @@ -14507,17 +14447,13 @@ pub unsafe fn _mm512_maskz_cvtt_roundps_epi32(k: __mmask16, a: __m512, sae: i32) /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundps_epu32&expand=1922) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttps2udq, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvtt_roundps_epu32(a: __m512, sae: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvtt_roundps_epu32(a: __m512) -> __m512i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let zero = _mm512_setzero_si512().as_u32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvttps2udq(a, zero, 0b11111111_11111111, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttps2udq(a, zero, 0b11111111_11111111, SAE); transmute(r) } @@ -14527,22 +14463,17 @@ pub unsafe fn _mm512_cvtt_roundps_epu32(a: __m512, sae: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundps_epu32&expand=1923) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttps2udq, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvtt_roundps_epu32( +#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvtt_roundps_epu32( src: __m512i, k: __mmask16, a: __m512, - sae: i32, ) -> __m512i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let src = src.as_u32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvttps2udq(a, src, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttps2udq(a, src, k, SAE); transmute(r) } @@ -14552,17 +14483,13 @@ pub unsafe fn _mm512_mask_cvtt_roundps_epu32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundps_epu32&expand=1924) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttps2udq, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvtt_roundps_epu32(k: __mmask16, a: __m512, sae: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvtt_roundps_epu32(k: __mmask16, a: __m512) -> __m512i { + static_assert_sae!(SAE); let a = a.as_f32x16(); let zero = _mm512_setzero_si512().as_u32x16(); - macro_rules! call { - ($imm4:expr) => { - vcvttps2udq(a, zero, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttps2udq(a, zero, k, SAE); transmute(r) } @@ -14572,17 +14499,13 @@ pub unsafe fn _mm512_maskz_cvtt_roundps_epu32(k: __mmask16, a: __m512, sae: i32) /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundpd_epi32&expand=1904) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttpd2dq, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvtt_roundpd_epi32(a: __m512d, sae: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvtt_roundpd_epi32(a: __m512d) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f64x8(); let zero = _mm256_setzero_si256().as_i32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvttpd2dq(a, zero, 0b11111111, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttpd2dq(a, zero, 0b11111111, SAE); transmute(r) } @@ -14592,22 +14515,17 @@ pub unsafe fn _mm512_cvtt_roundpd_epi32(a: __m512d, sae: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundpd_epi32&expand=1905) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttpd2dq, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvtt_roundpd_epi32( +#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvtt_roundpd_epi32( src: __m256i, k: __mmask8, a: __m512d, - sae: i32, ) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f64x8(); let src = src.as_i32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvttpd2dq(a, src, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttpd2dq(a, src, k, SAE); transmute(r) } @@ -14617,17 +14535,13 @@ pub unsafe fn _mm512_mask_cvtt_roundpd_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttpd2dq, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvtt_roundpd_epi32(k: __mmask8, a: __m512d, sae: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvtt_roundpd_epi32(k: __mmask8, a: __m512d) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f64x8(); let zero = _mm256_setzero_si256().as_i32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvttpd2dq(a, zero, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttpd2dq(a, zero, k, SAE); transmute(r) } @@ -14637,17 +14551,13 @@ pub unsafe fn _mm512_maskz_cvtt_roundpd_epi32(k: __mmask8, a: __m512d, sae: i32) /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundpd_epu32&expand=1910) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttpd2udq, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_cvtt_roundpd_epu32(a: __m512d, sae: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_cvtt_roundpd_epu32(a: __m512d) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f64x8(); let zero = _mm256_setzero_si256().as_i32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvttpd2udq(a, zero, 0b11111111, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttpd2udq(a, zero, 0b11111111, SAE); transmute(r) } @@ -14657,22 +14567,17 @@ pub unsafe fn _mm512_cvtt_roundpd_epu32(a: __m512d, sae: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundpd_epu32&expand=1911) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttpd2udq, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_cvtt_roundpd_epu32( +#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_cvtt_roundpd_epu32( src: __m256i, k: __mmask8, a: __m512d, - sae: i32, ) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f64x8(); let src = src.as_i32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvttpd2udq(a, src, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttpd2udq(a, src, k, SAE); transmute(r) } @@ -14896,17 +14801,13 @@ pub unsafe fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundpd_epu32&expand=1912) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvttpd2udq, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_cvtt_roundpd_epu32(k: __mmask8, a: __m512d, sae: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_cvtt_roundpd_epu32(k: __mmask8, a: __m512d) -> __m256i { + static_assert_sae!(SAE); let a = a.as_f64x8(); let zero = _mm256_setzero_si256().as_i32x8(); - macro_rules! call { - ($imm4:expr) => { - vcvttpd2udq(a, zero, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvttpd2udq(a, zero, k, SAE); transmute(r) } @@ -34064,18 +33965,15 @@ pub unsafe fn _mm_maskz_div_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_max_round_ss&expand=3668) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmaxss, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_max_round_ss(a: __m128, b: __m128, sae: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_max_round_ss(a: __m128, b: __m128) -> __m128 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vmaxss(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vmaxss(a, b, zero, 0b1, SAE); + transmute(r) } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34084,24 +33982,20 @@ pub unsafe fn _mm_max_round_ss(a: __m128, b: __m128, sae: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_max_ss&expand=3672) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmaxss, sae = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_max_round_ss( +#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_max_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, - sae: i32, ) -> __m128 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); let src = src.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vmaxss(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vmaxss(a, b, src, k, SAE); + transmute(r) } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34110,18 +34004,15 @@ pub unsafe fn _mm_mask_max_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_max_round_ss&expand=3667) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmaxss, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_max_round_ss(k: __mmask8, a: __m128, b: __m128, sae: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_max_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vmaxss(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vmaxss(a, b, zero, k, SAE); + transmute(r) } /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -34130,18 +34021,15 @@ pub unsafe fn _mm_maskz_max_round_ss(k: __mmask8, a: __m128, b: __m128, sae: i32 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_max_round_sd&expand=3665) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmaxsd, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_max_round_sd(a: __m128d, b: __m128d, sae: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_max_round_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vmaxsd(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vmaxsd(a, b, zero, 0b1, SAE); + transmute(r) } /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -34150,24 +34038,20 @@ pub unsafe fn _mm_max_round_sd(a: __m128d, b: __m128d, sae: i32) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_max_round_sd&expand=3663) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmaxsd, sae = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_max_round_sd( +#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_max_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, - sae: i32, ) -> __m128d { + static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); let src = src.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vmaxsd(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vmaxsd(a, b, src, k, SAE); + transmute(r) } /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -34176,18 +34060,19 @@ pub unsafe fn _mm_mask_max_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_max_sd&expand=3670) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmaxsd, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_max_round_sd(k: __mmask8, a: __m128d, b: __m128d, sae: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_max_round_sd( + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vmaxsd(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vmaxsd(a, b, zero, k, SAE); + transmute(r) } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34196,18 +34081,15 @@ pub unsafe fn _mm_maskz_max_round_sd(k: __mmask8, a: __m128d, b: __m128d, sae: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_min_round_ss&expand=3782) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vminss, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_min_round_ss(a: __m128, b: __m128, sae: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vminss, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_min_round_ss(a: __m128, b: __m128) -> __m128 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vminss(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vminss(a, b, zero, 0b1, SAE); + transmute(r) } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34216,24 +34098,20 @@ pub unsafe fn _mm_min_round_ss(a: __m128, b: __m128, sae: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_min_round_Ss&expand=3780) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vminss, sae = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_min_round_ss( +#[cfg_attr(test, assert_instr(vminss, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_min_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, - sae: i32, ) -> __m128 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); let src = src.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vminss(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vminss(a, b, src, k, SAE); + transmute(r) } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34242,18 +34120,15 @@ pub unsafe fn _mm_mask_min_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_min_round_ss&expand=3781) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vminss, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_min_round_ss(k: __mmask8, a: __m128, b: __m128, sae: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vminss, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_min_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vminss(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vminss(a, b, zero, k, SAE); + transmute(r) } /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst , and copy the upper element from a to the upper element of dst.\ @@ -34262,18 +34137,15 @@ pub unsafe fn _mm_maskz_min_round_ss(k: __mmask8, a: __m128, b: __m128, sae: i32 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_min_round_sd&expand=3779) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vminsd, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_min_round_sd(a: __m128d, b: __m128d, sae: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vminsd, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_min_round_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vminsd(a, b, zero, 0b1, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vminsd(a, b, zero, 0b1, SAE); + transmute(r) } /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -34282,24 +34154,20 @@ pub unsafe fn _mm_min_round_sd(a: __m128d, b: __m128d, sae: i32) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_min_round_sd&expand=3777) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vminsd, sae = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_min_round_sd( +#[cfg_attr(test, assert_instr(vminsd, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_min_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, - sae: i32, ) -> __m128d { + static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); let src = src.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vminsd(a, b, src, k, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vminsd(a, b, src, k, SAE); + transmute(r) } /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -34308,18 +34176,19 @@ pub unsafe fn _mm_mask_min_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_min_round_Sd&expand=3778) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vminsd, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_min_round_sd(k: __mmask8, a: __m128d, b: __m128d, sae: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vminsd, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_min_round_sd( + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vminsd(a, b, zero, k, $imm4) - }; - } - transmute(constify_imm4_sae!(sae, call)) + let r = vminsd(a, b, zero, k, SAE); + transmute(r) } /// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -34484,18 +34353,14 @@ pub unsafe fn _mm_maskz_sqrt_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_getexp_round_ss&expand=2856) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vgetexpss, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_getexp_round_ss(a: __m128, b: __m128, sae: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_getexp_round_ss(a: __m128, b: __m128) -> __m128 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vgetexpss(a, b, zero, 0b1, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vgetexpss(a, b, zero, 0b1, SAE); transmute(r) } @@ -34505,24 +34370,19 @@ pub unsafe fn _mm_getexp_round_ss(a: __m128, b: __m128, sae: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_getexp_round_ss&expand=2857) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vgetexpss, sae = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_getexp_round_ss( +#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_getexp_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, - sae: i32, ) -> __m128 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); let src = src.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vgetexpss(a, b, src, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vgetexpss(a, b, src, k, SAE); transmute(r) } @@ -34532,18 +34392,18 @@ pub unsafe fn _mm_mask_getexp_round_ss( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_getexp_round_ss&expand=2858) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vgetexpss, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_getexp_round_ss(k: __mmask8, a: __m128, b: __m128, sae: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_getexp_round_ss( + k: __mmask8, + a: __m128, + b: __m128, +) -> __m128 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vgetexpss(a, b, zero, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vgetexpss(a, b, zero, k, SAE); transmute(r) } @@ -34553,18 +34413,14 @@ pub unsafe fn _mm_maskz_getexp_round_ss(k: __mmask8, a: __m128, b: __m128, sae: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_getexp_round_sd&expand=2853) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vgetexpsd, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_getexp_round_sd(a: __m128d, b: __m128d, sae: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_getexp_round_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vgetexpsd(a, b, zero, 0b1, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vgetexpsd(a, b, zero, 0b1, SAE); transmute(r) } @@ -34574,24 +34430,19 @@ pub unsafe fn _mm_getexp_round_sd(a: __m128d, b: __m128d, sae: i32) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_getexp_round_sd&expand=2854) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vgetexpsd, sae = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_getexp_round_sd( +#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_getexp_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, - sae: i32, ) -> __m128d { + static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); let src = src.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vgetexpsd(a, b, src, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vgetexpsd(a, b, src, k, SAE); transmute(r) } @@ -34601,18 +34452,18 @@ pub unsafe fn _mm_mask_getexp_round_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_getexp_round_sd&expand=2855) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vgetexpsd, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_getexp_round_sd(k: __mmask8, a: __m128d, b: __m128d, sae: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_getexp_round_sd( + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vgetexpsd(a, b, zero, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vgetexpsd(a, b, zero, k, SAE); transmute(r) } @@ -36577,21 +36428,14 @@ pub unsafe fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundss_sd&expand=1371) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2sd, sae = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_cvt_roundss_sd(a: __m128d, b: __m128, sae: i32) -> __m128d { - macro_rules! call { - ($imm4:expr) => { - vcvtss2sd( - a.as_f64x2(), - b.as_f32x4(), - _mm_setzero_pd().as_f64x2(), - 0b11111111, - $imm4, - ) - }; - } - let r = constify_imm4_sae!(sae, call); +#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundss_sd(a: __m128d, b: __m128) -> __m128d { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f32x4(); + let zero = _mm_setzero_pd().as_f64x2(); + let r = vcvtss2sd(a, b, zero, 0b11111111, SAE); transmute(r) } @@ -36601,24 +36445,19 @@ pub unsafe fn _mm_cvt_roundss_sd(a: __m128d, b: __m128, sae: i32) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_cvt_roundss_sd&expand=1372) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2sd, sae = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_cvt_roundss_sd( +#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_cvt_roundss_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128, - sae: i32, ) -> __m128d { + static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f32x4(); let src = src.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2sd(a, b, src, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtss2sd(a, b, src, k, SAE); transmute(r) } @@ -36628,18 +36467,18 @@ pub unsafe fn _mm_mask_cvt_roundss_sd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_cvt_roundss_sd&expand=1373) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2sd, sae = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_cvt_roundss_sd(k: __mmask8, a: __m128d, b: __m128, sae: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_cvt_roundss_sd( + k: __mmask8, + a: __m128d, + b: __m128, +) -> __m128d { + static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f32x4(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2sd(a, b, zero, k, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtss2sd(a, b, zero, k, SAE); transmute(r) } @@ -36970,16 +36809,12 @@ pub unsafe fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundss_Si32&expand=1936) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2si, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundss_si32(a: __m128, sae: i32) -> i32 { +#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundss_si32(a: __m128) -> i32 { + static_assert_sae!(SAE); let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2si(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtss2si(a, SAE); transmute(r) } @@ -36989,16 +36824,12 @@ pub unsafe fn _mm_cvtt_roundss_si32(a: __m128, sae: i32) -> i32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundss_i32&expand=1934) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2si, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundss_i32(a: __m128, sae: i32) -> i32 { +#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundss_i32(a: __m128) -> i32 { + static_assert_sae!(SAE); let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2si(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtss2si(a, SAE); transmute(r) } @@ -37008,16 +36839,12 @@ pub unsafe fn _mm_cvtt_roundss_i32(a: __m128, sae: i32) -> i32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundss_u32&expand=1938) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2usi, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundss_u32(a: __m128, sae: i32) -> u32 { +#[cfg_attr(test, assert_instr(vcvtss2usi, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundss_u32(a: __m128) -> u32 { + static_assert_sae!(SAE); let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2usi(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtss2usi(a, SAE); transmute(r) } @@ -37047,16 +36874,12 @@ pub unsafe fn _mm_cvttss_u32(a: __m128) -> u32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_si32&expand=1930) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundsd_si32(a: __m128d, sae: i32) -> i32 { +#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundsd_si32(a: __m128d) -> i32 { + static_assert_sae!(SAE); let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2si(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtsd2si(a, SAE); transmute(r) } @@ -37066,16 +36889,12 @@ pub unsafe fn _mm_cvtt_roundsd_si32(a: __m128d, sae: i32) -> i32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_i32&expand=1928) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundsd_i32(a: __m128d, sae: i32) -> i32 { +#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundsd_i32(a: __m128d) -> i32 { + static_assert_sae!(SAE); let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2si(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtsd2si(a, SAE); transmute(r) } @@ -37085,16 +36904,12 @@ pub unsafe fn _mm_cvtt_roundsd_i32(a: __m128d, sae: i32) -> i32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundsd_u32&expand=1932) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2usi, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundsd_u32(a: __m128d, sae: i32) -> u32 { +#[cfg_attr(test, assert_instr(vcvtsd2usi, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundsd_u32(a: __m128d) -> u32 { + static_assert_sae!(SAE); let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2usi(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtsd2usi(a, SAE); transmute(r) } @@ -44170,7 +43985,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvt_roundps_ph() { let a = _mm512_set1_ps(1.); - let r = _mm512_cvt_roundps_ph(a, _MM_FROUND_NO_EXC); + let r = _mm512_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(a); let e = _mm256_setr_epi64x( 4323521613979991040, 4323521613979991040, @@ -44184,9 +43999,9 @@ mod tests { unsafe fn test_mm512_mask_cvt_roundps_ph() { let a = _mm512_set1_ps(1.); let src = _mm256_set1_epi16(0); - let r = _mm512_mask_cvt_roundps_ph(src, 0, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a); assert_eq_m256i(r, src); - let r = _mm512_mask_cvt_roundps_ph(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a); let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0); assert_eq_m256i(r, e); } @@ -44194,9 +44009,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvt_roundps_ph() { let a = _mm512_set1_ps(1.); - let r = _mm512_maskz_cvt_roundps_ph(0, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm512_maskz_cvt_roundps_ph(0b00000000_11111111, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a); let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0); assert_eq_m256i(r, e); } @@ -44246,7 +44061,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtps_ph() { let a = _mm512_set1_ps(1.); - let r = _mm512_cvtps_ph(a, _MM_FROUND_NO_EXC); + let r = _mm512_cvtps_ph::<_MM_FROUND_NO_EXC>(a); let e = _mm256_setr_epi64x( 4323521613979991040, 4323521613979991040, @@ -44260,9 +44075,9 @@ mod tests { unsafe fn test_mm512_mask_cvtps_ph() { let a = _mm512_set1_ps(1.); let src = _mm256_set1_epi16(0); - let r = _mm512_mask_cvtps_ph(src, 0, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a); assert_eq_m256i(r, src); - let r = _mm512_mask_cvtps_ph(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a); let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0); assert_eq_m256i(r, e); } @@ -44270,9 +44085,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvtps_ph() { let a = _mm512_set1_ps(1.); - let r = _mm512_maskz_cvtps_ph(0, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm512_maskz_cvtps_ph(0b00000000_11111111, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a); let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0); assert_eq_m256i(r, e); } @@ -44327,7 +44142,7 @@ mod tests { 4323521613979991040, 4323521613979991040, ); - let r = _mm512_cvt_roundph_ps(a, _MM_FROUND_NO_EXC); + let r = _mm512_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(a); let e = _mm512_set1_ps(1.); assert_eq_m512(r, e); } @@ -44341,9 +44156,9 @@ mod tests { 4323521613979991040, ); let src = _mm512_set1_ps(0.); - let r = _mm512_mask_cvt_roundph_ps(src, 0, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0, a); assert_eq_m512(r, src); - let r = _mm512_mask_cvt_roundph_ps(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a); let e = _mm512_setr_ps( 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., ); @@ -44358,9 +44173,9 @@ mod tests { 4323521613979991040, 4323521613979991040, ); - let r = _mm512_maskz_cvt_roundph_ps(0, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0, a); assert_eq_m512(r, _mm512_setzero_ps()); - let r = _mm512_maskz_cvt_roundph_ps(0b00000000_11111111, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a); let e = _mm512_setr_ps( 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., ); @@ -44462,7 +44277,7 @@ mod tests { let a = _mm512_setr_ps( 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); - let r = _mm512_cvtt_roundps_epi32(a, _MM_FROUND_NO_EXC); + let r = _mm512_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(a); let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15); assert_eq_m512i(r, e); } @@ -44473,9 +44288,9 @@ mod tests { 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); let src = _mm512_set1_epi32(0); - let r = _mm512_mask_cvtt_roundps_epi32(src, 0, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0, a); assert_eq_m512i(r, src); - let r = _mm512_mask_cvtt_roundps_epi32(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a); let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); } @@ -44485,9 +44300,9 @@ mod tests { let a = _mm512_setr_ps( 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); - let r = _mm512_maskz_cvtt_roundps_epi32(0, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_cvtt_roundps_epi32(0b00000000_11111111, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a); let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); } @@ -44497,7 +44312,7 @@ mod tests { let a = _mm512_setr_ps( 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); - let r = _mm512_cvtt_roundps_epu32(a, _MM_FROUND_NO_EXC); + let r = _mm512_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(a); let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15); assert_eq_m512i(r, e); } @@ -44508,9 +44323,9 @@ mod tests { 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); let src = _mm512_set1_epi32(0); - let r = _mm512_mask_cvtt_roundps_epu32(src, 0, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0, a); assert_eq_m512i(r, src); - let r = _mm512_mask_cvtt_roundps_epu32(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a); let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); } @@ -44520,9 +44335,9 @@ mod tests { let a = _mm512_setr_ps( 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5, ); - let r = _mm512_maskz_cvtt_roundps_epu32(0, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_cvtt_roundps_epu32(0b00000000_11111111, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a); let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); } @@ -52598,7 +52413,7 @@ mod tests { unsafe fn test_mm_max_round_ss() { let a = _mm_set_ps(0., 1., 2., 3.); let b = _mm_set_ps(4., 5., 6., 7.); - let r = _mm_max_round_ss(a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b); let e = _mm_set_ps(0., 1., 2., 7.); assert_eq_m128(r, e); } @@ -52607,10 +52422,10 @@ mod tests { unsafe fn test_mm_mask_max_round_ss() { let a = _mm_set_ps(0., 1., 2., 3.); let b = _mm_set_ps(4., 5., 6., 7.); - let r = _mm_mask_max_round_ss(a, 0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); let e = _mm_set_ps(0., 1., 2., 3.); assert_eq_m128(r, e); - let r = _mm_mask_max_round_ss(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); let e = _mm_set_ps(0., 1., 2., 7.); assert_eq_m128(r, e); } @@ -52619,10 +52434,10 @@ mod tests { unsafe fn test_mm_maskz_max_round_ss() { let a = _mm_set_ps(0., 1., 2., 3.); let b = _mm_set_ps(4., 5., 6., 7.); - let r = _mm_maskz_max_round_ss(0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b); let e = _mm_set_ps(0., 1., 2., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_max_round_ss(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); let e = _mm_set_ps(0., 1., 2., 7.); assert_eq_m128(r, e); } @@ -52631,7 +52446,7 @@ mod tests { unsafe fn test_mm_max_round_sd() { let a = _mm_set_pd(0., 1.); let b = _mm_set_pd(2., 3.); - let r = _mm_max_round_sd(a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b); let e = _mm_set_pd(0., 3.); assert_eq_m128d(r, e); } @@ -52640,10 +52455,10 @@ mod tests { unsafe fn test_mm_mask_max_round_sd() { let a = _mm_set_pd(0., 1.); let b = _mm_set_pd(2., 3.); - let r = _mm_mask_max_round_sd(a, 0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); let e = _mm_set_pd(0., 1.); assert_eq_m128d(r, e); - let r = _mm_mask_max_round_sd(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); let e = _mm_set_pd(0., 3.); assert_eq_m128d(r, e); } @@ -52652,10 +52467,10 @@ mod tests { unsafe fn test_mm_maskz_max_round_sd() { let a = _mm_set_pd(0., 1.); let b = _mm_set_pd(2., 3.); - let r = _mm_maskz_max_round_sd(0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b); let e = _mm_set_pd(0., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_max_round_sd(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); let e = _mm_set_pd(0., 3.); assert_eq_m128d(r, e); } @@ -52664,7 +52479,7 @@ mod tests { unsafe fn test_mm_min_round_ss() { let a = _mm_set_ps(0., 1., 2., 3.); let b = _mm_set_ps(4., 5., 6., 7.); - let r = _mm_min_round_ss(a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b); let e = _mm_set_ps(0., 1., 2., 3.); assert_eq_m128(r, e); } @@ -52673,10 +52488,10 @@ mod tests { unsafe fn test_mm_mask_min_round_ss() { let a = _mm_set_ps(0., 1., 2., 3.); let b = _mm_set_ps(4., 5., 6., 7.); - let r = _mm_mask_min_round_ss(a, 0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); let e = _mm_set_ps(0., 1., 2., 3.); assert_eq_m128(r, e); - let r = _mm_mask_min_round_ss(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); let e = _mm_set_ps(0., 1., 2., 3.); assert_eq_m128(r, e); } @@ -52685,10 +52500,10 @@ mod tests { unsafe fn test_mm_maskz_min_round_ss() { let a = _mm_set_ps(0., 1., 2., 3.); let b = _mm_set_ps(4., 5., 6., 7.); - let r = _mm_maskz_min_round_ss(0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b); let e = _mm_set_ps(0., 1., 2., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_min_round_ss(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); let e = _mm_set_ps(0., 1., 2., 3.); assert_eq_m128(r, e); } @@ -52697,7 +52512,7 @@ mod tests { unsafe fn test_mm_min_round_sd() { let a = _mm_set_pd(0., 1.); let b = _mm_set_pd(2., 3.); - let r = _mm_min_round_sd(a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b); let e = _mm_set_pd(0., 1.); assert_eq_m128d(r, e); } @@ -52706,10 +52521,10 @@ mod tests { unsafe fn test_mm_mask_min_round_sd() { let a = _mm_set_pd(0., 1.); let b = _mm_set_pd(2., 3.); - let r = _mm_mask_min_round_sd(a, 0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); let e = _mm_set_pd(0., 1.); assert_eq_m128d(r, e); - let r = _mm_mask_min_round_sd(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); let e = _mm_set_pd(0., 1.); assert_eq_m128d(r, e); } @@ -52718,10 +52533,10 @@ mod tests { unsafe fn test_mm_maskz_min_round_sd() { let a = _mm_set_pd(0., 1.); let b = _mm_set_pd(2., 3.); - let r = _mm_maskz_min_round_sd(0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b); let e = _mm_set_pd(0., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_min_round_sd(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); let e = _mm_set_pd(0., 1.); assert_eq_m128d(r, e); } @@ -52804,7 +52619,7 @@ mod tests { unsafe fn test_mm_getexp_round_ss() { let a = _mm_set1_ps(2.); let b = _mm_set1_ps(3.); - let r = _mm_getexp_round_ss(a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b); let e = _mm_set_ps(2., 2., 2., 1.); assert_eq_m128(r, e); } @@ -52813,10 +52628,10 @@ mod tests { unsafe fn test_mm_mask_getexp_round_ss() { let a = _mm_set1_ps(2.); let b = _mm_set1_ps(3.); - let r = _mm_mask_getexp_round_ss(a, 0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); let e = _mm_set_ps(2., 2., 2., 2.); assert_eq_m128(r, e); - let r = _mm_mask_getexp_round_ss(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); let e = _mm_set_ps(2., 2., 2., 1.); assert_eq_m128(r, e); } @@ -52825,10 +52640,10 @@ mod tests { unsafe fn test_mm_maskz_getexp_round_ss() { let a = _mm_set1_ps(2.); let b = _mm_set1_ps(3.); - let r = _mm_maskz_getexp_round_ss(0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b); let e = _mm_set_ps(2., 2., 2., 0.); assert_eq_m128(r, e); - let r = _mm_maskz_getexp_round_ss(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); let e = _mm_set_ps(2., 2., 2., 1.); assert_eq_m128(r, e); } @@ -52837,7 +52652,7 @@ mod tests { unsafe fn test_mm_getexp_round_sd() { let a = _mm_set1_pd(2.); let b = _mm_set1_pd(3.); - let r = _mm_getexp_round_sd(a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b); let e = _mm_set_pd(2., 1.); assert_eq_m128d(r, e); } @@ -52846,10 +52661,10 @@ mod tests { unsafe fn test_mm_mask_getexp_round_sd() { let a = _mm_set1_pd(2.); let b = _mm_set1_pd(3.); - let r = _mm_mask_getexp_round_sd(a, 0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); let e = _mm_set_pd(2., 2.); assert_eq_m128d(r, e); - let r = _mm_mask_getexp_round_sd(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); let e = _mm_set_pd(2., 1.); assert_eq_m128d(r, e); } @@ -52858,10 +52673,10 @@ mod tests { unsafe fn test_mm_maskz_getexp_round_sd() { let a = _mm_set1_pd(2.); let b = _mm_set1_pd(3.); - let r = _mm_maskz_getexp_round_sd(0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b); let e = _mm_set_pd(2., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_getexp_round_sd(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); let e = _mm_set_pd(2., 1.); assert_eq_m128d(r, e); } @@ -53804,7 +53619,7 @@ mod tests { unsafe fn test_mm_cvt_roundss_sd() { let a = _mm_set_pd(6., -7.5); let b = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvt_roundss_sd(a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, b); let e = _mm_set_pd(6., -1.5); assert_eq_m128d(r, e); } @@ -53813,9 +53628,9 @@ mod tests { unsafe fn test_mm_mask_cvt_roundss_sd() { let a = _mm_set_pd(6., -7.5); let b = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_mask_cvt_roundss_sd(a, 0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); assert_eq_m128d(r, a); - let r = _mm_mask_cvt_roundss_sd(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b); let e = _mm_set_pd(6., -1.5); assert_eq_m128d(r, e); } @@ -53824,10 +53639,10 @@ mod tests { unsafe fn test_mm_maskz_cvt_roundss_sd() { let a = _mm_set_pd(6., -7.5); let b = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_maskz_cvt_roundss_sd(0, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b); let e = _mm_set_pd(6., 0.); assert_eq_m128d(r, e); - let r = _mm_maskz_cvt_roundss_sd(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION); + let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b); let e = _mm_set_pd(6., -1.5); assert_eq_m128d(r, e); } @@ -53996,7 +53811,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundss_si32() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvtt_roundss_si32(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundss_si32::<_MM_FROUND_CUR_DIRECTION>(a); let e: i32 = -2; assert_eq!(r, e); } @@ -54004,7 +53819,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundss_i32() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvtt_roundss_i32(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundss_i32::<_MM_FROUND_CUR_DIRECTION>(a); let e: i32 = -2; assert_eq!(r, e); } @@ -54012,7 +53827,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundss_u32() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvtt_roundss_u32(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundss_u32::<_MM_FROUND_CUR_DIRECTION>(a); let e: u32 = u32::MAX; assert_eq!(r, e); } @@ -54036,7 +53851,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundsd_si32() { let a = _mm_set_pd(1., -1.5); - let r = _mm_cvtt_roundsd_si32(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundsd_si32::<_MM_FROUND_CUR_DIRECTION>(a); let e: i32 = -2; assert_eq!(r, e); } @@ -54044,7 +53859,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundsd_i32() { let a = _mm_set_pd(1., -1.5); - let r = _mm_cvtt_roundsd_i32(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundsd_i32::<_MM_FROUND_CUR_DIRECTION>(a); let e: i32 = -2; assert_eq!(r, e); } @@ -54052,7 +53867,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundsd_u32() { let a = _mm_set_pd(1., -1.5); - let r = _mm_cvtt_roundsd_u32(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundsd_u32::<_MM_FROUND_CUR_DIRECTION>(a); let e: u32 = u32::MAX; assert_eq!(r, e); } diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index ae6202bc73..2db8a430d4 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -5090,7 +5090,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtt_roundpd_epi32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_cvtt_roundpd_epi32(a, _MM_FROUND_NO_EXC); + let r = _mm512_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(a); let e = _mm256_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7); assert_eq_m256i(r, e); } @@ -5099,9 +5099,9 @@ mod tests { unsafe fn test_mm512_mask_cvtt_roundpd_epi32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); let src = _mm256_set1_epi32(0); - let r = _mm512_mask_cvtt_roundpd_epi32(src, 0, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(src, 0, a); assert_eq_m256i(r, src); - let r = _mm512_mask_cvtt_roundpd_epi32(src, 0b00001111, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(src, 0b00001111, a); let e = _mm256_setr_epi32(0, -1, 2, -3, 0, 0, 0, 0); assert_eq_m256i(r, e); } @@ -5109,9 +5109,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvtt_roundpd_epi32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_maskz_cvtt_roundpd_epi32(0, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm512_maskz_cvtt_roundpd_epi32(0b00001111, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(0b00001111, a); let e = _mm256_setr_epi32(0, -1, 2, -3, 0, 0, 0, 0); assert_eq_m256i(r, e); } @@ -5119,7 +5119,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtt_roundpd_epu32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_cvtt_roundpd_epu32(a, _MM_FROUND_NO_EXC); + let r = _mm512_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(a); let e = _mm256_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1); assert_eq_m256i(r, e); } @@ -5128,9 +5128,9 @@ mod tests { unsafe fn test_mm512_mask_cvtt_roundpd_epu32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); let src = _mm256_set1_epi32(0); - let r = _mm512_mask_cvtt_roundpd_epu32(src, 0, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(src, 0, a); assert_eq_m256i(r, src); - let r = _mm512_mask_cvtt_roundpd_epu32(src, 0b00001111, a, _MM_FROUND_NO_EXC); + let r = _mm512_mask_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(src, 0b00001111, a); let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0); assert_eq_m256i(r, e); } @@ -5138,9 +5138,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvtt_roundpd_epu32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_maskz_cvtt_roundpd_epu32(0, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm512_maskz_cvtt_roundpd_epu32(0b00001111, a, _MM_FROUND_NO_EXC); + let r = _mm512_maskz_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(0b00001111, a); let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0); assert_eq_m256i(r, e); } @@ -6259,7 +6259,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvt_roundps_pd() { let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_cvt_roundps_pd(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(a); let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); assert_eq_m512d(r, e); } @@ -6268,9 +6268,9 @@ mod tests { unsafe fn test_mm512_mask_cvt_roundps_pd() { let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); let src = _mm512_set1_pd(0.); - let r = _mm512_mask_cvt_roundps_pd(src, 0, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_mask_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(src, 0, a); assert_eq_m512d(r, src); - let r = _mm512_mask_cvt_roundps_pd(src, 0b00001111, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_mask_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(src, 0b00001111, a); let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 0., 0., 0., 0.); assert_eq_m512d(r, e); } @@ -6278,9 +6278,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvt_roundps_pd() { let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_maskz_cvt_roundps_pd(0, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_maskz_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(0, a); assert_eq_m512d(r, _mm512_setzero_pd()); - let r = _mm512_maskz_cvt_roundps_pd(0b00001111, a, _MM_FROUND_CUR_DIRECTION); + let r = _mm512_maskz_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a); let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 0., 0., 0., 0.); assert_eq_m512d(r, e); } From 3bb7aeff4271aefc8fd48f006cb5696a225498d9 Mon Sep 17 00:00:00 2001 From: jironglin Date: Wed, 3 Mar 2021 17:02:33 +0000 Subject: [PATCH 020/123] shuffle_epi32 --- crates/core_arch/src/x86/avx512f.rs | 137 +++++++++------------------- 1 file changed, 44 insertions(+), 93 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index bcd826d700..c50bd73360 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -21523,75 +21523,32 @@ pub unsafe fn _mm_mask2_permutex2var_pd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_epi32&expand=5150) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))] //should be vpshufd, but generate vpermilps -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_shuffle_epi32(a: __m512i, imm8: _MM_PERM_ENUM) -> __m512i { - let imm8 = (imm8 & 0xFF) as u8; - let a = a.as_i32x16(); - macro_rules! shuffle4 { - ( - $a:expr, - $b:expr, - $c:expr, - $d:expr, - $e:expr, - $f:expr, - $g:expr, - $h:expr, - $i:expr, - $j:expr, - $k:expr, - $l:expr, - $m:expr, - $n:expr, - $o:expr, - $p:expr - ) => { - simd_shuffle16( - a, - a, - [ - $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p, - ], - ) - }; - } - macro_rules! shuffle3 { - ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => { - match (imm8 >> 6) & 0x3 { - 0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28), - 1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29), - 2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30), - _ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31), - } - }; - } - macro_rules! shuffle2 { - ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => { - match (imm8 >> 4) & 0x3 { - 0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28), - 1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29), - 2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30), - _ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31), - } - }; - } - macro_rules! shuffle1 { - ($a:expr, $e:expr, $i: expr, $m: expr) => { - match (imm8 >> 2) & 0x3 { - 0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12), - 1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13), - 2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14), - _ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15), - } - }; - } - let r: i32x16 = match imm8 & 0x3 { - 0 => shuffle1!(0, 4, 8, 12), - 1 => shuffle1!(1, 5, 9, 13), - 2 => shuffle1!(2, 6, 10, 14), - _ => shuffle1!(3, 7, 11, 15), - }; +#[cfg_attr(test, assert_instr(vpermilps, MASK = 9))] //should be vpshufd +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_shuffle_epi32(a: __m512i) -> __m512i { + static_assert_imm8!(MASK); + let r: i32x16 = simd_shuffle16( + a.as_i32x16(), + a.as_i32x16(), + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + (MASK as u32 >> 4) & 0b11, + (MASK as u32 >> 6) & 0b11, + (MASK as u32 & 0b11) + 4, + ((MASK as u32 >> 2) & 0b11) + 4, + ((MASK as u32 >> 4) & 0b11) + 4, + ((MASK as u32 >> 6) & 0b11) + 4, + (MASK as u32 & 0b11) + 8, + ((MASK as u32 >> 2) & 0b11) + 8, + ((MASK as u32 >> 4) & 0b11) + 8, + ((MASK as u32 >> 6) & 0b11) + 8, + (MASK as u32 & 0b11) + 12, + ((MASK as u32 >> 2) & 0b11) + 12, + ((MASK as u32 >> 4) & 0b11) + 12, + ((MASK as u32 >> 6) & 0b11) + 12, + ], + ); transmute(r) } @@ -21600,20 +21557,15 @@ pub unsafe fn _mm512_shuffle_epi32(a: __m512i, imm8: _MM_PERM_ENUM) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_epi32&expand=5148) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_shuffle_epi32( +#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_shuffle_epi32( src: __m512i, k: __mmask16, a: __m512i, - imm8: _MM_PERM_ENUM, ) -> __m512i { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_epi32(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(MASK); + let r = _mm512_shuffle_epi32::(a); transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16())) } @@ -21622,15 +21574,14 @@ pub unsafe fn _mm512_mask_shuffle_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_epi32&expand=5149) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_shuffle_epi32(k: __mmask16, a: __m512i, imm8: _MM_PERM_ENUM) -> __m512i { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_epi32(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_shuffle_epi32( + k: __mmask16, + a: __m512i, +) -> __m512i { + static_assert_imm8!(MASK); + let r = _mm512_shuffle_epi32::(a); let zero = _mm512_setzero_si512().as_i32x16(); transmute(simd_select_bitmask(k, r.as_i32x16(), zero)) } @@ -47705,7 +47656,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_shuffle_epi32() { let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); - let r = _mm512_shuffle_epi32(a, _MM_PERM_AADD); + let r = _mm512_shuffle_epi32::<_MM_PERM_AADD>(a); let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9); assert_eq_m512i(r, e); } @@ -47713,9 +47664,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_shuffle_epi32() { let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); - let r = _mm512_mask_shuffle_epi32(a, 0, a, _MM_PERM_AADD); + let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a); assert_eq_m512i(r, a); - let r = _mm512_mask_shuffle_epi32(a, 0b11111111_11111111, a, _MM_PERM_AADD); + let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111_11111111, a); let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9); assert_eq_m512i(r, e); } @@ -47723,9 +47674,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_shuffle_epi32() { let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); - let r = _mm512_maskz_shuffle_epi32(0, a, _MM_PERM_AADD); + let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_shuffle_epi32(0b00000000_11111111, a, _MM_PERM_AADD); + let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00000000_11111111, a); let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); } From e4a915d4c5e07683e968d716c34274e45d06761a Mon Sep 17 00:00:00 2001 From: jironglin Date: Wed, 3 Mar 2021 18:07:40 +0000 Subject: [PATCH 021/123] mm256_shuffle_epi32 --- crates/core_arch/src/x86/avx2.rs | 85 ++++++----------------------- crates/core_arch/src/x86/avx512f.rs | 40 ++++++-------- 2 files changed, 35 insertions(+), 90 deletions(-) diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs index e1fa8bc9b9..ae15fc6db6 100644 --- a/crates/core_arch/src/x86/avx2.rs +++ b/crates/core_arch/src/x86/avx2.rs @@ -2642,74 +2642,25 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_epi32) #[inline] #[target_feature(enable = "avx2")] -#[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))] -#[rustc_args_required_const(1)] +#[cfg_attr(test, assert_instr(vpermilps, MASK = 9))] +#[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_shuffle_epi32(a: __m256i, imm8: i32) -> __m256i { - // simd_shuffleX requires that its selector parameter be made up of - // constant values, but we can't enforce that here. In spirit, we need - // to write a `match` on all possible values of a byte, and for each value, - // hard-code the correct `simd_shuffleX` call using only constants. We - // then hope for LLVM to do the rest. - // - // Of course, that's... awful. So we try to use macros to do it for us. - let imm8 = (imm8 & 0xFF) as u8; - - let a = a.as_i32x8(); - macro_rules! shuffle_done { - ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => { - simd_shuffle8( - a, - a, - [ - $x01, - $x23, - $x45, - $x67, - 4 + $x01, - 4 + $x23, - 4 + $x45, - 4 + $x67, - ], - ) - }; - } - macro_rules! shuffle_x67 { - ($x01:expr, $x23:expr, $x45:expr) => { - match (imm8 >> 6) & 0b11 { - 0b00 => shuffle_done!($x01, $x23, $x45, 0), - 0b01 => shuffle_done!($x01, $x23, $x45, 1), - 0b10 => shuffle_done!($x01, $x23, $x45, 2), - _ => shuffle_done!($x01, $x23, $x45, 3), - } - }; - } - macro_rules! shuffle_x45 { - ($x01:expr, $x23:expr) => { - match (imm8 >> 4) & 0b11 { - 0b00 => shuffle_x67!($x01, $x23, 0), - 0b01 => shuffle_x67!($x01, $x23, 1), - 0b10 => shuffle_x67!($x01, $x23, 2), - _ => shuffle_x67!($x01, $x23, 3), - } - }; - } - macro_rules! shuffle_x23 { - ($x01:expr) => { - match (imm8 >> 2) & 0b11 { - 0b00 => shuffle_x45!($x01, 0), - 0b01 => shuffle_x45!($x01, 1), - 0b10 => shuffle_x45!($x01, 2), - _ => shuffle_x45!($x01, 3), - } - }; - } - let r: i32x8 = match imm8 & 0b11 { - 0b00 => shuffle_x23!(0), - 0b01 => shuffle_x23!(1), - 0b10 => shuffle_x23!(2), - _ => shuffle_x23!(3), - }; +pub unsafe fn _mm256_shuffle_epi32(a: __m256i) -> __m256i { + static_assert_imm8!(MASK); + let r: i32x8 = simd_shuffle8( + a.as_i32x8(), + a.as_i32x8(), + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + (MASK as u32 >> 4) & 0b11, + (MASK as u32 >> 6) & 0b11, + (MASK as u32 & 0b11) + 4, + ((MASK as u32 >> 2) & 0b11) + 4, + ((MASK as u32 >> 4) & 0b11) + 4, + ((MASK as u32 >> 6) & 0b11) + 4, + ], + ); transmute(r) } diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index c50bd73360..befa3047c6 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -21591,20 +21591,15 @@ pub unsafe fn _mm512_maskz_shuffle_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shuffle_epi32&expand=5145) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_mask_shuffle_epi32( +#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_mask_shuffle_epi32( src: __m256i, k: __mmask8, a: __m256i, - imm8: _MM_PERM_ENUM, ) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_epi32(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(MASK); + let r = _mm256_shuffle_epi32::(a); transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8())) } @@ -21613,15 +21608,14 @@ pub unsafe fn _mm256_mask_shuffle_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shuffle_epi32&expand=5146) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_maskz_shuffle_epi32(k: __mmask8, a: __m256i, imm8: _MM_PERM_ENUM) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_epi32(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_maskz_shuffle_epi32( + k: __mmask8, + a: __m256i, +) -> __m256i { + static_assert_imm8!(MASK); + let r = _mm256_shuffle_epi32::(a); let zero = _mm256_setzero_si256().as_i32x8(); transmute(simd_select_bitmask(k, r.as_i32x8(), zero)) } @@ -47684,9 +47678,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_mask_shuffle_epi32() { let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16); - let r = _mm256_mask_shuffle_epi32(a, 0, a, _MM_PERM_AADD); + let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a); assert_eq_m256i(r, a); - let r = _mm256_mask_shuffle_epi32(a, 0b11111111, a, _MM_PERM_AADD); + let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111, a); let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9); assert_eq_m256i(r, e); } @@ -47694,9 +47688,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_maskz_shuffle_epi32() { let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16); - let r = _mm256_maskz_shuffle_epi32(0, a, _MM_PERM_AADD); + let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_shuffle_epi32(0b11111111, a, _MM_PERM_AADD); + let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b11111111, a); let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9); assert_eq_m256i(r, e); } From 743bd9d67494fb027faccd6cc2e654f90cb37fc0 Mon Sep 17 00:00:00 2001 From: jironglin Date: Wed, 3 Mar 2021 20:48:07 +0000 Subject: [PATCH 022/123] mm512_srai_epi32 --- crates/core_arch/src/x86/avx512f.rs | 100 ++++++++++++---------------- crates/core_arch/src/x86/macros.rs | 16 +++++ 2 files changed, 59 insertions(+), 57 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index befa3047c6..8f3c80e113 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -18338,16 +18338,12 @@ pub unsafe fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srai_epi32&expand=5436) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i { +#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_srai_epi32(a: __m512i) -> __m512i { + static_assert_imm8u!(IMM8); let a = a.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vpsraid(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vpsraid(a, IMM8); transmute(r) } @@ -18356,17 +18352,17 @@ pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srai_epi32&expand=5434) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { +#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_srai_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, +) -> __m512i { + static_assert_imm8u!(IMM8); let a = a.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vpsraid(a, $imm8) - }; - } - let shf = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, shf, src.as_i32x16())) + let r = vpsraid(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x16())) } /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18374,18 +18370,14 @@ pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srai_epi32&expand=5435) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { +#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i) -> __m512i { + static_assert_imm8u!(IMM8); let a = a.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vpsraid(a, $imm8) - }; - } - let shf = constify_imm8_sae!(imm8, call); + let r = vpsraid(a, IMM8); let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21625,20 +21617,15 @@ pub unsafe fn _mm256_maskz_shuffle_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shuffle_epi32&expand=5142) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_mask_shuffle_epi32( +#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_mask_shuffle_epi32( src: __m128i, k: __mmask8, a: __m128i, - imm8: _MM_PERM_ENUM, ) -> __m128i { - macro_rules! call { - ($imm8:expr) => { - _mm_shuffle_epi32::<$imm8>(a) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(MASK); + let r = _mm_shuffle_epi32::(a); transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4())) } @@ -21647,15 +21634,14 @@ pub unsafe fn _mm_mask_shuffle_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shuffle_epi32&expand=5143) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_maskz_shuffle_epi32(k: __mmask8, a: __m128i, imm8: _MM_PERM_ENUM) -> __m128i { - macro_rules! call { - ($imm8:expr) => { - _mm_shuffle_epi32::<$imm8>(a) - }; - } - let r = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_maskz_shuffle_epi32( + k: __mmask8, + a: __m128i, +) -> __m128i { + static_assert_imm8!(MASK); + let r = _mm_shuffle_epi32::(a); let zero = _mm_setzero_si128().as_i32x4(); transmute(simd_select_bitmask(k, r.as_i32x4(), zero)) } @@ -46913,7 +46899,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_srai_epi32() { let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15); - let r = _mm512_srai_epi32(a, 2); + let r = _mm512_srai_epi32::<2>(a); let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4); assert_eq_m512i(r, e); } @@ -46921,9 +46907,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_srai_epi32() { let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15); - let r = _mm512_mask_srai_epi32(a, 0, a, 2); + let r = _mm512_mask_srai_epi32::<2>(a, 0, a); assert_eq_m512i(r, a); - let r = _mm512_mask_srai_epi32(a, 0b11111111_11111111, a, 2); + let r = _mm512_mask_srai_epi32::<2>(a, 0b11111111_11111111, a); let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4); assert_eq_m512i(r, e); } @@ -46931,9 +46917,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_srai_epi32() { let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15); - let r = _mm512_maskz_srai_epi32(0, a, 2); + let r = _mm512_maskz_srai_epi32::<2>(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_srai_epi32(0b00000000_11111111, a, 2); + let r = _mm512_maskz_srai_epi32::<2>(0b00000000_11111111, a); let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4); assert_eq_m512i(r, e); } @@ -47698,9 +47684,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_mask_shuffle_epi32() { let a = _mm_set_epi32(1, 4, 5, 8); - let r = _mm_mask_shuffle_epi32(a, 0, a, _MM_PERM_AADD); + let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a); assert_eq_m128i(r, a); - let r = _mm_mask_shuffle_epi32(a, 0b00001111, a, _MM_PERM_AADD); + let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b00001111, a); let e = _mm_set_epi32(8, 8, 1, 1); assert_eq_m128i(r, e); } @@ -47708,9 +47694,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_maskz_shuffle_epi32() { let a = _mm_set_epi32(1, 4, 5, 8); - let r = _mm_maskz_shuffle_epi32(0, a, _MM_PERM_AADD); + let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_shuffle_epi32(0b00001111, a, _MM_PERM_AADD); + let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00001111, a); let e = _mm_set_epi32(8, 8, 1, 1); assert_eq_m128i(r, e); } diff --git a/crates/core_arch/src/x86/macros.rs b/crates/core_arch/src/x86/macros.rs index e659ac3da8..ecb7085d18 100644 --- a/crates/core_arch/src/x86/macros.rs +++ b/crates/core_arch/src/x86/macros.rs @@ -32,6 +32,22 @@ macro_rules! static_assert_sae { }; } +// Helper struct used to trigger const eval errors when the const generic immediate value `imm` is +// out of `bits`-bit range. +pub(crate) struct ValidateConstImmU; +impl ValidateConstImmU { + pub(crate) const VALID: () = { + let _ = 1 / ((IMM < (1 << BITS)) as usize); + }; +} + +#[allow(unused)] +macro_rules! static_assert_imm8u { + ($imm:ident) => { + let _ = $crate::core_arch::x86::macros::ValidateConstImmU::<$imm, 8>::VALID; + }; +} + macro_rules! constify_imm6 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] From ff63fc18756fa4de519ebeacf7f1ea78a3fcb934 Mon Sep 17 00:00:00 2001 From: Lzu Tao Date: Sun, 28 Feb 2021 12:11:20 +0700 Subject: [PATCH 023/123] Add `static_assert_imm{1,5,6}` macros --- crates/core_arch/src/macros.rs | 44 ++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/crates/core_arch/src/macros.rs b/crates/core_arch/src/macros.rs index bc43f039b7..87e49fba4b 100644 --- a/crates/core_arch/src/macros.rs +++ b/crates/core_arch/src/macros.rs @@ -1,46 +1,60 @@ //! Utility macros. -// Helper struct used to trigger const eval errors when the const generic immediate value `imm` is -// out of `bits`-bit range. -pub(crate) struct ValidateConstImm; -impl ValidateConstImm { +// Helper struct used to trigger const eval errors when the const generic immediate value `IMM` is +// out of `[MIN-MAX]` range. +pub(crate) struct ValidateConstImm; +impl ValidateConstImm { pub(crate) const VALID: () = { - let _ = 1 / ((IMM >= 0 && IMM < (1 << BITS)) as usize); + let _ = 1 / ((IMM >= MIN && IMM <= MAX) as usize); }; } -#[allow(unused)] +#[allow(unused_macros)] macro_rules! static_assert_imm1 { ($imm:ident) => { - let _ = $crate::core_arch::macros::ValidateConstImm::<$imm, 1>::VALID; + let _ = $crate::core_arch::macros::ValidateConstImm::<$imm, 0, { (1 << 1) - 1 }>::VALID; }; } -#[allow(unused)] +#[allow(unused_macros)] macro_rules! static_assert_imm2 { ($imm:ident) => { - let _ = $crate::core_arch::macros::ValidateConstImm::<$imm, 2>::VALID; + let _ = $crate::core_arch::macros::ValidateConstImm::<$imm, 0, { (1 << 2) - 1 }>::VALID; }; } -#[allow(unused)] +#[allow(unused_macros)] macro_rules! static_assert_imm3 { ($imm:ident) => { - let _ = $crate::core_arch::macros::ValidateConstImm::<$imm, 3>::VALID; + let _ = $crate::core_arch::macros::ValidateConstImm::<$imm, 0, { (1 << 3) - 1 }>::VALID; }; } -#[allow(unused)] +#[allow(unused_macros)] macro_rules! static_assert_imm4 { ($imm:ident) => { - let _ = $crate::core_arch::macros::ValidateConstImm::<$imm, 4>::VALID; + let _ = $crate::core_arch::macros::ValidateConstImm::<$imm, 0, { (1 << 4) - 1 }>::VALID; }; } -#[allow(unused)] +#[allow(unused_macros)] +macro_rules! static_assert_imm5 { + ($imm:ident) => { + let _ = $crate::core_arch::macros::ValidateConstImm::<$imm, 0, { (1 << 5) - 1 }>::VALID; + }; +} + +#[allow(unused_macros)] +macro_rules! static_assert_imm6 { + ($imm:ident) => { + let _ = $crate::core_arch::macros::ValidateConstImm::<$imm, 0, { (1 << 6) - 1 }>::VALID; + }; +} + +#[allow(unused_macros)] macro_rules! static_assert_imm8 { ($imm:ident) => { - let _ = $crate::core_arch::macros::ValidateConstImm::<$imm, 8>::VALID; + let _ = $crate::core_arch::macros::ValidateConstImm::<$imm, 0, { (1 << 8) - 1 }>::VALID; }; } From 374060ebcd03186f673c013f7ce2f03f89e8e1c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Mon, 1 Mar 2021 20:55:40 +0100 Subject: [PATCH 024/123] Convert `_mm_cmp_pd` to const generics and fix imm width --- crates/core_arch/src/x86/avx.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index 559d6279b2..315cd18efd 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -792,16 +792,12 @@ pub const _CMP_TRUE_US: i32 = 0x1f; /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_pd) #[inline] #[target_feature(enable = "avx,sse2")] -#[cfg_attr(test, assert_instr(vcmpeqpd, imm8 = 0))] // TODO Validate vcmppd -#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(vcmpeqpd, IMM8 = 0))] // TODO Validate vcmppd +#[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { - macro_rules! call { - ($imm8:expr) => { - vcmppd(a, b, $imm8) - }; - } - constify_imm6!(imm8, call) +pub unsafe fn _mm_cmp_pd(a: __m128d, b: __m128d) -> __m128d { + static_assert_imm5!(IMM8); + vcmppd(a, b, IMM8 as i8) } /// Compares packed double-precision (64-bit) floating-point @@ -3635,7 +3631,7 @@ mod tests { unsafe fn test_mm_cmp_pd() { let a = _mm_setr_pd(4., 9.); let b = _mm_setr_pd(4., 3.); - let r = _mm_cmp_pd(a, b, _CMP_GE_OS); + let r = _mm_cmp_pd::<_CMP_GE_OS>(a, b); assert!(get_m128d(r, 0).is_nan()); assert!(get_m128d(r, 1).is_nan()); } From 105064303230d3ea627164d9975bf4fd5a9c20f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Mon, 1 Mar 2021 20:59:48 +0100 Subject: [PATCH 025/123] Convert `_mm256_cmp_pd` to const generics and fix imm width --- crates/core_arch/src/x86/avx.rs | 16 ++++++---------- crates/core_arch/src/x86/test.rs | 2 +- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index 315cd18efd..0b520fc6f0 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -807,16 +807,12 @@ pub unsafe fn _mm_cmp_pd(a: __m128d, b: __m128d) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_pd) #[inline] #[target_feature(enable = "avx")] -#[cfg_attr(test, assert_instr(vcmpeqpd, imm8 = 0))] // TODO Validate vcmppd -#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(vcmpeqpd, IMM8 = 0))] // TODO Validate vcmppd +#[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cmp_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { - macro_rules! call { - ($imm8:expr) => { - vcmppd256(a, b, $imm8) - }; - } - constify_imm6!(imm8, call) +pub unsafe fn _mm256_cmp_pd(a: __m256d, b: __m256d) -> __m256d { + static_assert_imm5!(IMM8); + vcmppd256(a, b, IMM8 as u8) } /// Compares packed single-precision (32-bit) floating-point @@ -3640,7 +3636,7 @@ mod tests { unsafe fn test_mm256_cmp_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm256_setr_pd(5., 6., 7., 8.); - let r = _mm256_cmp_pd(a, b, _CMP_GE_OS); + let r = _mm256_cmp_pd::<_CMP_GE_OS>(a, b); let e = _mm256_set1_pd(0.); assert_eq_m256d(r, e); } diff --git a/crates/core_arch/src/x86/test.rs b/crates/core_arch/src/x86/test.rs index c1f974133c..d08052df32 100644 --- a/crates/core_arch/src/x86/test.rs +++ b/crates/core_arch/src/x86/test.rs @@ -47,7 +47,7 @@ pub unsafe fn assert_eq_m256i(a: __m256i, b: __m256i) { #[target_feature(enable = "avx")] pub unsafe fn assert_eq_m256d(a: __m256d, b: __m256d) { - let cmp = _mm256_cmp_pd(a, b, _CMP_EQ_OQ); + let cmp = _mm256_cmp_pd::<_CMP_EQ_OQ>(a, b); if _mm256_movemask_pd(cmp) != 0b1111 { panic!("{:?} != {:?}", a, b); } From 5cb5d50889cac474bf50b8627cdea1d2ed660eb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Mon, 1 Mar 2021 21:03:55 +0100 Subject: [PATCH 026/123] Convert `_mm_cmp_ps` to const generics and fix imm width --- crates/core_arch/src/x86/avx.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index 0b520fc6f0..5fc097169b 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -822,16 +822,12 @@ pub unsafe fn _mm256_cmp_pd(a: __m256d, b: __m256d) -> __m256d /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ps) #[inline] #[target_feature(enable = "avx,sse")] -#[cfg_attr(test, assert_instr(vcmpeqps, imm8 = 0))] // TODO Validate vcmpps -#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(vcmpeqps, IMM8 = 0))] // TODO Validate vcmpps +#[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 { - macro_rules! call { - ($imm8:expr) => { - vcmpps(a, b, $imm8) - }; - } - constify_imm6!(imm8, call) +pub unsafe fn _mm_cmp_ps(a: __m128, b: __m128) -> __m128 { + static_assert_imm5!(IMM8); + vcmpps(a, b, IMM8 as i8) } /// Compares packed single-precision (32-bit) floating-point @@ -3645,7 +3641,7 @@ mod tests { unsafe fn test_mm_cmp_ps() { let a = _mm_setr_ps(4., 3., 2., 5.); let b = _mm_setr_ps(4., 9., 16., 25.); - let r = _mm_cmp_ps(a, b, _CMP_GE_OS); + let r = _mm_cmp_ps::<_CMP_GE_OS>(a, b); assert!(get_m128(r, 0).is_nan()); assert_eq!(get_m128(r, 1), 0.); assert_eq!(get_m128(r, 2), 0.); From 784e22e6649677274a4d4873f4453c6e8b2d53a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Mon, 1 Mar 2021 21:06:57 +0100 Subject: [PATCH 027/123] Convert `_mm256_cmp_ps` to const generics and fix imm width --- crates/core_arch/src/x86/avx.rs | 16 ++++++---------- crates/core_arch/src/x86/test.rs | 2 +- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index 5fc097169b..a690bc5ad6 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -837,16 +837,12 @@ pub unsafe fn _mm_cmp_ps(a: __m128, b: __m128) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_ps) #[inline] #[target_feature(enable = "avx")] -#[cfg_attr(test, assert_instr(vcmpeqps, imm8 = 0))] // TODO Validate vcmpps -#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(vcmpeqps, IMM8 = 0))] // TODO Validate vcmpps +#[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cmp_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { - macro_rules! call { - ($imm8:expr) => { - vcmpps256(a, b, $imm8) - }; - } - constify_imm6!(imm8, call) +pub unsafe fn _mm256_cmp_ps(a: __m256, b: __m256) -> __m256 { + static_assert_imm5!(IMM8); + vcmpps256(a, b, IMM8 as u8) } /// Compares the lower double-precision (64-bit) floating-point element in @@ -3652,7 +3648,7 @@ mod tests { unsafe fn test_mm256_cmp_ps() { let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.); let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.); - let r = _mm256_cmp_ps(a, b, _CMP_GE_OS); + let r = _mm256_cmp_ps::<_CMP_GE_OS>(a, b); let e = _mm256_set1_ps(0.); assert_eq_m256(r, e); } diff --git a/crates/core_arch/src/x86/test.rs b/crates/core_arch/src/x86/test.rs index d08052df32..0784e37524 100644 --- a/crates/core_arch/src/x86/test.rs +++ b/crates/core_arch/src/x86/test.rs @@ -60,7 +60,7 @@ pub unsafe fn get_m256d(a: __m256d, idx: usize) -> f64 { #[target_feature(enable = "avx")] pub unsafe fn assert_eq_m256(a: __m256, b: __m256) { - let cmp = _mm256_cmp_ps(a, b, _CMP_EQ_OQ); + let cmp = _mm256_cmp_ps::<_CMP_EQ_OQ>(a, b); if _mm256_movemask_ps(cmp) != 0b11111111 { panic!("{:?} != {:?}", a, b); } From 06c707ff448a2612aad3823b8ee1b0b5c6815db3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Mon, 1 Mar 2021 21:09:16 +0100 Subject: [PATCH 028/123] Convert `_mm_cmp_sd` to const generics and fix imm width --- crates/core_arch/src/x86/avx.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index a690bc5ad6..8bfd907857 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -854,16 +854,12 @@ pub unsafe fn _mm256_cmp_ps(a: __m256, b: __m256) -> __m256 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sd) #[inline] #[target_feature(enable = "avx,sse2")] -#[cfg_attr(test, assert_instr(vcmpeqsd, imm8 = 0))] // TODO Validate vcmpsd -#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(vcmpeqsd, IMM8 = 0))] // TODO Validate vcmpsd +#[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmp_sd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { - macro_rules! call { - ($imm8:expr) => { - vcmpsd(a, b, $imm8) - }; - } - constify_imm6!(imm8, call) +pub unsafe fn _mm_cmp_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_imm5!(IMM8); + vcmpsd(a, b, IMM8 as i8) } /// Compares the lower single-precision (32-bit) floating-point element in @@ -3657,7 +3653,7 @@ mod tests { unsafe fn test_mm_cmp_sd() { let a = _mm_setr_pd(4., 9.); let b = _mm_setr_pd(4., 3.); - let r = _mm_cmp_sd(a, b, _CMP_GE_OS); + let r = _mm_cmp_sd::<_CMP_GE_OS>(a, b); assert!(get_m128d(r, 0).is_nan()); assert_eq!(get_m128d(r, 1), 9.); } From 42643cb5f54cbf2f5d8713a3c1f1dd24dbbac919 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Mon, 1 Mar 2021 21:13:40 +0100 Subject: [PATCH 029/123] Convert `_mm_cmp_ss` to const generics and fix imm width --- crates/core_arch/src/x86/avx.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index 8bfd907857..98ba11fefa 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -871,16 +871,12 @@ pub unsafe fn _mm_cmp_sd(a: __m128d, b: __m128d) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ss) #[inline] #[target_feature(enable = "avx,sse")] -#[cfg_attr(test, assert_instr(vcmpeqss, imm8 = 0))] // TODO Validate vcmpss -#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(vcmpeqss, IMM8 = 0))] // TODO Validate vcmpss +#[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmp_ss(a: __m128, b: __m128, imm8: i32) -> __m128 { - macro_rules! call { - ($imm8:expr) => { - vcmpss(a, b, $imm8) - }; - } - constify_imm6!(imm8, call) +pub unsafe fn _mm_cmp_ss(a: __m128, b: __m128) -> __m128 { + static_assert_imm5!(IMM8); + vcmpss(a, b, IMM8 as i8) } /// Converts packed 32-bit integers in `a` to packed double-precision (64-bit) @@ -3662,7 +3658,7 @@ mod tests { unsafe fn test_mm_cmp_ss() { let a = _mm_setr_ps(4., 3., 2., 5.); let b = _mm_setr_ps(4., 9., 16., 25.); - let r = _mm_cmp_ss(a, b, _CMP_GE_OS); + let r = _mm_cmp_ss::<_CMP_GE_OS>(a, b); assert!(get_m128(r, 0).is_nan()); assert_eq!(get_m128(r, 1), 3.); assert_eq!(get_m128(r, 2), 2.); From 90fa961616a889ca704da2635fa244da4d2ab052 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Mon, 1 Mar 2021 21:23:57 +0100 Subject: [PATCH 030/123] Convert `_mm256_insert_epi8` to const generics --- crates/core_arch/src/x86/avx.rs | 15 +++++---------- crates/core_arch/src/x86/avx2.rs | 10 +++++----- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index 98ba11fefa..28e14fb758 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -1470,16 +1470,11 @@ pub unsafe fn _mm256_insertf128_si256(a: __m256i, b: __m128i, imm8: i32) -> __m2 #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. -#[rustc_args_required_const(2)] +#[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8, index: i32) -> __m256i { - let a = a.as_i8x32(); - macro_rules! call { - ($index:expr) => { - simd_insert(a, $index, i) - }; - } - transmute(constify_imm5!(index, call)) +pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8) -> __m256i { + static_assert_imm5!(INDEX); + transmute(simd_insert(a.as_i8x32(), INDEX as u32, i)) } /// Copies `a` to result, and inserts the 16-bit integer `i` into result @@ -3931,7 +3926,7 @@ mod tests { 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ); - let r = _mm256_insert_epi8(a, 0, 31); + let r = _mm256_insert_epi8::<31>(a, 0); #[rustfmt::skip] let e = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs index ae15fc6db6..7fa1f1625e 100644 --- a/crates/core_arch/src/x86/avx2.rs +++ b/crates/core_arch/src/x86/avx2.rs @@ -4335,8 +4335,8 @@ mod tests { #[simd_test(enable = "avx2")] unsafe fn test_mm256_blendv_epi8() { let (a, b) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2)); - let mask = _mm256_insert_epi8(_mm256_set1_epi8(0), -1, 2); - let e = _mm256_insert_epi8(_mm256_set1_epi8(4), 2, 2); + let mask = _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), -1); + let e = _mm256_insert_epi8::<2>(_mm256_set1_epi8(4), 2); let r = _mm256_blendv_epi8(a, b, mask); assert_eq_m256i(r, e); } @@ -4455,7 +4455,7 @@ mod tests { 7, 6, 5, 4, 3, 2, 1, 0, ); let r = _mm256_cmpeq_epi8(a, b); - assert_eq_m256i(r, _mm256_insert_epi8(_mm256_set1_epi8(0), !0, 2)); + assert_eq_m256i(r, _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), !0)); } #[simd_test(enable = "avx2")] @@ -4494,10 +4494,10 @@ mod tests { #[simd_test(enable = "avx2")] unsafe fn test_mm256_cmpgt_epi8() { - let a = _mm256_insert_epi8(_mm256_set1_epi8(0), 5, 0); + let a = _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), 5); let b = _mm256_set1_epi8(0); let r = _mm256_cmpgt_epi8(a, b); - assert_eq_m256i(r, _mm256_insert_epi8(_mm256_set1_epi8(0), !0, 0)); + assert_eq_m256i(r, _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), !0)); } #[simd_test(enable = "avx2")] From 965341701f8b8976f977b1d82cbdc76972704b3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Mon, 1 Mar 2021 22:13:18 +0100 Subject: [PATCH 031/123] Convert `_mm256_extract_epi8` to const generics --- crates/core_arch/src/x86/avx2.rs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs index 7fa1f1625e..62c678bffa 100644 --- a/crates/core_arch/src/x86/avx2.rs +++ b/crates/core_arch/src/x86/avx2.rs @@ -3694,16 +3694,11 @@ pub unsafe fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i { #[inline] #[target_feature(enable = "avx2")] // This intrinsic has no corresponding instruction. -#[rustc_args_required_const(1)] +#[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extract_epi8(a: __m256i, imm8: i32) -> i32 { - let a = a.as_u8x32(); - macro_rules! call { - ($imm5:expr) => { - simd_extract::<_, u8>(a, $imm5) as i32 - }; - } - constify_imm5!(imm8, call) +pub unsafe fn _mm256_extract_epi8(a: __m256i) -> i32 { + static_assert_imm5!(IMM8); + simd_extract::<_, u8>(a.as_u8x32(), IMM8 as u32) as i32 } /// Extracts a 16-bit integer from `a`, selected with `imm8`. Returns a 32-bit @@ -6071,8 +6066,8 @@ mod tests { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ); - let r1 = _mm256_extract_epi8(a, 0); - let r2 = _mm256_extract_epi8(a, 35); + let r1 = _mm256_extract_epi8::<0>(a); + let r2 = _mm256_extract_epi8::<3>(a); assert_eq!(r1, 0xFF); assert_eq!(r2, 3); } From 7f47cf864fe645da26ffe7cb25950d6d55f2e3e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Tue, 2 Mar 2021 19:39:06 +0100 Subject: [PATCH 032/123] remove unused x86 macros --- crates/core_arch/src/x86/macros.rs | 65 ------------------------------ 1 file changed, 65 deletions(-) diff --git a/crates/core_arch/src/x86/macros.rs b/crates/core_arch/src/x86/macros.rs index ecb7085d18..bf734974af 100644 --- a/crates/core_arch/src/x86/macros.rs +++ b/crates/core_arch/src/x86/macros.rs @@ -48,71 +48,6 @@ macro_rules! static_assert_imm8u { }; } -macro_rules! constify_imm6 { - ($imm8:expr, $expand:ident) => { - #[allow(overflowing_literals)] - match ($imm8) & 0b1_1111 { - 0 => $expand!(0), - 1 => $expand!(1), - 2 => $expand!(2), - 3 => $expand!(3), - 4 => $expand!(4), - 5 => $expand!(5), - 6 => $expand!(6), - 7 => $expand!(7), - 8 => $expand!(8), - 9 => $expand!(9), - 10 => $expand!(10), - 11 => $expand!(11), - 12 => $expand!(12), - 13 => $expand!(13), - 14 => $expand!(14), - 15 => $expand!(15), - 16 => $expand!(16), - 17 => $expand!(17), - 18 => $expand!(18), - 19 => $expand!(19), - 20 => $expand!(20), - 21 => $expand!(21), - 22 => $expand!(22), - 23 => $expand!(23), - 24 => $expand!(24), - 25 => $expand!(25), - 26 => $expand!(26), - 27 => $expand!(27), - 28 => $expand!(28), - 29 => $expand!(29), - 30 => $expand!(30), - _ => $expand!(31), - } - }; -} - -#[allow(unused_macros)] -macro_rules! constify_imm4 { - ($imm8:expr, $expand:ident) => { - #[allow(overflowing_literals)] - match ($imm8) & 0b1111 { - 0 => $expand!(0), - 1 => $expand!(1), - 2 => $expand!(2), - 3 => $expand!(3), - 4 => $expand!(4), - 5 => $expand!(5), - 6 => $expand!(6), - 7 => $expand!(7), - 8 => $expand!(8), - 9 => $expand!(9), - 10 => $expand!(10), - 11 => $expand!(11), - 12 => $expand!(12), - 13 => $expand!(13), - 14 => $expand!(14), - _ => $expand!(15), - } - }; -} - macro_rules! constify_imm3 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] From 5653eed489138795a424d0028fa7720960b5bb8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Wed, 3 Mar 2021 02:38:26 +0100 Subject: [PATCH 033/123] make some const generic immediates better match their width or the intel intrinsics guide --- crates/core_arch/src/x86/avx.rs | 60 ++++++++++++++++---------------- crates/core_arch/src/x86/avx2.rs | 24 ++++++------- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index 28e14fb758..53c4a00f42 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -787,66 +787,66 @@ pub const _CMP_TRUE_US: i32 = 0x1f; /// Compares packed double-precision (64-bit) floating-point /// elements in `a` and `b` based on the comparison operand -/// specified by `imm8`. +/// specified by `IMM5`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_pd) #[inline] #[target_feature(enable = "avx,sse2")] -#[cfg_attr(test, assert_instr(vcmpeqpd, IMM8 = 0))] // TODO Validate vcmppd +#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] // TODO Validate vcmppd #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmp_pd(a: __m128d, b: __m128d) -> __m128d { - static_assert_imm5!(IMM8); - vcmppd(a, b, IMM8 as i8) +pub unsafe fn _mm_cmp_pd(a: __m128d, b: __m128d) -> __m128d { + static_assert_imm5!(IMM5); + vcmppd(a, b, IMM5 as i8) } /// Compares packed double-precision (64-bit) floating-point /// elements in `a` and `b` based on the comparison operand -/// specified by `imm8`. +/// specified by `IMM5`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_pd) #[inline] #[target_feature(enable = "avx")] -#[cfg_attr(test, assert_instr(vcmpeqpd, IMM8 = 0))] // TODO Validate vcmppd +#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] // TODO Validate vcmppd #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cmp_pd(a: __m256d, b: __m256d) -> __m256d { - static_assert_imm5!(IMM8); - vcmppd256(a, b, IMM8 as u8) +pub unsafe fn _mm256_cmp_pd(a: __m256d, b: __m256d) -> __m256d { + static_assert_imm5!(IMM5); + vcmppd256(a, b, IMM5 as u8) } /// Compares packed single-precision (32-bit) floating-point /// elements in `a` and `b` based on the comparison operand -/// specified by `imm8`. +/// specified by `IMM5`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ps) #[inline] #[target_feature(enable = "avx,sse")] -#[cfg_attr(test, assert_instr(vcmpeqps, IMM8 = 0))] // TODO Validate vcmpps +#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] // TODO Validate vcmpps #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmp_ps(a: __m128, b: __m128) -> __m128 { - static_assert_imm5!(IMM8); - vcmpps(a, b, IMM8 as i8) +pub unsafe fn _mm_cmp_ps(a: __m128, b: __m128) -> __m128 { + static_assert_imm5!(IMM5); + vcmpps(a, b, IMM5 as i8) } /// Compares packed single-precision (32-bit) floating-point /// elements in `a` and `b` based on the comparison operand -/// specified by `imm8`. +/// specified by `IMM5`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_ps) #[inline] #[target_feature(enable = "avx")] -#[cfg_attr(test, assert_instr(vcmpeqps, IMM8 = 0))] // TODO Validate vcmpps +#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] // TODO Validate vcmpps #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cmp_ps(a: __m256, b: __m256) -> __m256 { - static_assert_imm5!(IMM8); - vcmpps256(a, b, IMM8 as u8) +pub unsafe fn _mm256_cmp_ps(a: __m256, b: __m256) -> __m256 { + static_assert_imm5!(IMM5); + vcmpps256(a, b, IMM5 as u8) } /// Compares the lower double-precision (64-bit) floating-point element in -/// `a` and `b` based on the comparison operand specified by `imm8`, +/// `a` and `b` based on the comparison operand specified by `IMM5`, /// store the result in the lower element of returned vector, /// and copies the upper element from `a` to the upper element of returned /// vector. @@ -854,16 +854,16 @@ pub unsafe fn _mm256_cmp_ps(a: __m256, b: __m256) -> __m256 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sd) #[inline] #[target_feature(enable = "avx,sse2")] -#[cfg_attr(test, assert_instr(vcmpeqsd, IMM8 = 0))] // TODO Validate vcmpsd +#[cfg_attr(test, assert_instr(vcmpeqsd, IMM5 = 0))] // TODO Validate vcmpsd #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmp_sd(a: __m128d, b: __m128d) -> __m128d { - static_assert_imm5!(IMM8); - vcmpsd(a, b, IMM8 as i8) +pub unsafe fn _mm_cmp_sd(a: __m128d, b: __m128d) -> __m128d { + static_assert_imm5!(IMM5); + vcmpsd(a, b, IMM5 as i8) } /// Compares the lower single-precision (32-bit) floating-point element in -/// `a` and `b` based on the comparison operand specified by `imm8`, +/// `a` and `b` based on the comparison operand specified by `IMM5`, /// store the result in the lower element of returned vector, /// and copies the upper 3 packed elements from `a` to the upper elements of /// returned vector. @@ -871,12 +871,12 @@ pub unsafe fn _mm_cmp_sd(a: __m128d, b: __m128d) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ss) #[inline] #[target_feature(enable = "avx,sse")] -#[cfg_attr(test, assert_instr(vcmpeqss, IMM8 = 0))] // TODO Validate vcmpss +#[cfg_attr(test, assert_instr(vcmpeqss, IMM5 = 0))] // TODO Validate vcmpss #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmp_ss(a: __m128, b: __m128) -> __m128 { - static_assert_imm5!(IMM8); - vcmpss(a, b, IMM8 as i8) +pub unsafe fn _mm_cmp_ss(a: __m128, b: __m128) -> __m128 { + static_assert_imm5!(IMM5); + vcmpss(a, b, IMM5 as i8) } /// Converts packed 32-bit integers in `a` to packed double-precision (64-bit) diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs index 62c678bffa..c98c1d8005 100644 --- a/crates/core_arch/src/x86/avx2.rs +++ b/crates/core_arch/src/x86/avx2.rs @@ -3685,7 +3685,7 @@ pub unsafe fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) } -/// Extracts an 8-bit integer from `a`, selected with `imm8`. Returns a 32-bit +/// Extracts an 8-bit integer from `a`, selected with `INDEX`. Returns a 32-bit /// integer containing the zero-extended integer data. /// /// See [LLVM commit D20468](https://reviews.llvm.org/D20468). @@ -3696,12 +3696,12 @@ pub unsafe fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i { // This intrinsic has no corresponding instruction. #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extract_epi8(a: __m256i) -> i32 { - static_assert_imm5!(IMM8); - simd_extract::<_, u8>(a.as_u8x32(), IMM8 as u32) as i32 +pub unsafe fn _mm256_extract_epi8(a: __m256i) -> i32 { + static_assert_imm5!(INDEX); + simd_extract::<_, u8>(a.as_u8x32(), INDEX as u32) as i32 } -/// Extracts a 16-bit integer from `a`, selected with `imm8`. Returns a 32-bit +/// Extracts a 16-bit integer from `a`, selected with `INDEX`. Returns a 32-bit /// integer containing the zero-extended integer data. /// /// See [LLVM commit D20468](https://reviews.llvm.org/D20468). @@ -3712,12 +3712,12 @@ pub unsafe fn _mm256_extract_epi8(a: __m256i) -> i32 { // This intrinsic has no corresponding instruction. #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extract_epi16(a: __m256i) -> i32 { - static_assert_imm4!(IMM8); - simd_extract::<_, u16>(a.as_u16x16(), IMM8 as u32) as i32 +pub unsafe fn _mm256_extract_epi16(a: __m256i) -> i32 { + static_assert_imm4!(INDEX); + simd_extract::<_, u16>(a.as_u16x16(), INDEX as u32) as i32 } -/// Extracts a 32-bit integer from `a`, selected with `imm8`. +/// Extracts a 32-bit integer from `a`, selected with `INDEX`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi32) #[inline] @@ -3725,9 +3725,9 @@ pub unsafe fn _mm256_extract_epi16(a: __m256i) -> i32 { // This intrinsic has no corresponding instruction. #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extract_epi32(a: __m256i) -> i32 { - static_assert_imm3!(IMM8); - simd_extract(a.as_i32x8(), IMM8 as u32) +pub unsafe fn _mm256_extract_epi32(a: __m256i) -> i32 { + static_assert_imm3!(INDEX); + simd_extract(a.as_i32x8(), INDEX as u32) } /// Returns the first element of the input vector of `[4 x double]`. From 4ec00defa601d8dac651a8b13b93da8013c76405 Mon Sep 17 00:00:00 2001 From: jironglin Date: Wed, 3 Mar 2021 21:01:18 +0000 Subject: [PATCH 034/123] fix macro --- crates/core_arch/src/x86/macros.rs | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/crates/core_arch/src/x86/macros.rs b/crates/core_arch/src/x86/macros.rs index bf734974af..4f160221d6 100644 --- a/crates/core_arch/src/x86/macros.rs +++ b/crates/core_arch/src/x86/macros.rs @@ -32,22 +32,6 @@ macro_rules! static_assert_sae { }; } -// Helper struct used to trigger const eval errors when the const generic immediate value `imm` is -// out of `bits`-bit range. -pub(crate) struct ValidateConstImmU; -impl ValidateConstImmU { - pub(crate) const VALID: () = { - let _ = 1 / ((IMM < (1 << BITS)) as usize); - }; -} - -#[allow(unused)] -macro_rules! static_assert_imm8u { - ($imm:ident) => { - let _ = $crate::core_arch::x86::macros::ValidateConstImmU::<$imm, 8>::VALID; - }; -} - macro_rules! constify_imm3 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] From 01fb6247a16b92c445710730ffc38bbe5df05e8a Mon Sep 17 00:00:00 2001 From: jironglin Date: Wed, 3 Mar 2021 23:09:01 +0000 Subject: [PATCH 035/123] shuffle_i32x4 --- crates/core_arch/src/x86/avx512f.rs | 223 +++++++++------------------- 1 file changed, 72 insertions(+), 151 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 8f3c80e113..cdc3f2d003 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -21909,78 +21909,34 @@ pub unsafe fn _mm_maskz_shuffle_pd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shuffle_i32&expand=5177) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10010101))] //should be vshufi32x4 -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_shuffle_i32x4(a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); - let imm8 = (imm8 & 0xFF) as u8; +#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10010101))] //should be vshufi32x4 +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_shuffle_i32x4(a: __m512i, b: __m512i) -> __m512i { + static_assert_imm8!(MASK); let a = a.as_i32x16(); let b = b.as_i32x16(); - macro_rules! shuffle4 { - ( - $a:expr, - $b:expr, - $c:expr, - $d:expr, - $e:expr, - $f:expr, - $g:expr, - $h:expr, - $i:expr, - $j:expr, - $k:expr, - $l:expr, - $m:expr, - $n:expr, - $o:expr, - $p:expr - ) => { - simd_shuffle16( - a, - b, - [ - $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p, - ], - ) - }; - } - macro_rules! shuffle3 { - ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => { - match (imm8 >> 6) & 0x3 { - 0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19), - 1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23), - 2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27), - _ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31), - } - }; - } - macro_rules! shuffle2 { - ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => { - match (imm8 >> 4) & 0x3 { - 0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19), - 1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23), - 2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27), - _ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31), - } - }; - } - macro_rules! shuffle1 { - ($a:expr, $e:expr, $i: expr, $m: expr) => { - match (imm8 >> 2) & 0x3 { - 0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3), - 1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7), - 2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11), - _ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15), - } - }; - } - let r: i32x16 = match imm8 & 0x3 { - 0 => shuffle1!(0, 1, 2, 3), - 1 => shuffle1!(4, 5, 6, 7), - 2 => shuffle1!(8, 9, 10, 11), - _ => shuffle1!(12, 13, 14, 15), - }; - + let r: i32x16 = simd_shuffle16( + a, + b, + [ + (MASK as u32 & 0b11) * 4 + 0, + (MASK as u32 & 0b11) * 4 + 1, + (MASK as u32 & 0b11) * 4 + 2, + (MASK as u32 & 0b11) * 4 + 3, + ((MASK as u32 >> 2) & 0b11) * 4 + 0, + ((MASK as u32 >> 2) & 0b11) * 4 + 1, + ((MASK as u32 >> 2) & 0b11) * 4 + 2, + ((MASK as u32 >> 2) & 0b11) * 4 + 3, + ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16, + ], + ); transmute(r) } @@ -21989,21 +21945,15 @@ pub unsafe fn _mm512_shuffle_i32x4(a: __m512i, b: __m512i, imm8: i32) -> __m512i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_i32x&expand=5175) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufi32x4, imm8 = 0b10110101))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_shuffle_i32x4( +#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10110101))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_shuffle_i32x4( src: __m512i, k: __mmask16, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_i32x4(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = _mm512_shuffle_i32x4::(a, b); transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16())) } @@ -22012,20 +21962,14 @@ pub unsafe fn _mm512_mask_shuffle_i32x4( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_i32&expand=5176) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufi32x4, imm8 = 0b10110101))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_shuffle_i32x4( +#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10110101))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_shuffle_i32x4( k: __mmask16, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_i32x4(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = _mm512_shuffle_i32x4::(a, b); let zero = _mm512_setzero_si512().as_i32x16(); transmute(simd_select_bitmask(k, r.as_i32x16(), zero)) } @@ -22035,39 +21979,26 @@ pub unsafe fn _mm512_maskz_shuffle_i32x4( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_i32x4&expand=5174) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vperm, imm8 = 0b01))] //should be vshufi32x4 -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_shuffle_i32x4(a: __m256i, b: __m256i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); - let imm8 = (imm8 & 0xFF) as u8; +#[cfg_attr(test, assert_instr(vperm, MASK = 0b1001))] //should be vshufi32x4 +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_shuffle_i32x4(a: __m256i, b: __m256i) -> __m256i { + static_assert_imm8!(MASK); let a = a.as_i32x8(); let b = b.as_i32x8(); - macro_rules! shuffle2 { - ( - $a:expr, - $b:expr, - $c:expr, - $d:expr, - $e:expr, - $f:expr, - $g:expr, - $h:expr - ) => { - simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]) - }; - } - macro_rules! shuffle1 { - ($a:expr, $b:expr, $c: expr, $d: expr) => { - match (imm8 >> 1) & 0x1 { - 0 => shuffle2!($a, $b, $c, $d, 8, 9, 10, 11), - _ => shuffle2!($a, $b, $c, $d, 12, 13, 14, 15), - } - }; - } - let r: i32x8 = match imm8 & 0x1 { - 0 => shuffle1!(0, 1, 2, 3), - _ => shuffle1!(4, 5, 6, 7), - }; + let r: i32x8 = simd_shuffle8( + a, + b, + [ + (MASK as u32 & 0b1) * 4 + 0, + (MASK as u32 & 0b1) * 4 + 1, + (MASK as u32 & 0b1) * 4 + 2, + (MASK as u32 & 0b1) * 4 + 3, + ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8, + ], + ); transmute(r) } @@ -22076,21 +22007,16 @@ pub unsafe fn _mm256_shuffle_i32x4(a: __m256i, b: __m256i, imm8: i32) -> __m256i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shuffle_i32x4&expand=5172) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshufi32x4, imm8 = 0b11))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_shuffle_i32x4( +#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b1101))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_shuffle_i32x4( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_i32x4(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(MASK); + let r = _mm256_shuffle_i32x4::(a, b); transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8())) } @@ -22099,20 +22025,15 @@ pub unsafe fn _mm256_mask_shuffle_i32x4( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shuffle_i32x4&expand=5173) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshufi32x4, imm8 = 0b11))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_maskz_shuffle_i32x4( +#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b1101))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_maskz_shuffle_i32x4( k: __mmask8, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_i32x4(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(MASK); + let r = _mm256_shuffle_i32x4::(a, b); let zero = _mm256_setzero_si256().as_i32x8(); transmute(simd_select_bitmask(k, r.as_i32x8(), zero)) } @@ -47798,7 +47719,7 @@ mod tests { unsafe fn test_mm512_shuffle_i32x4() { let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm512_shuffle_i32x4(a, b, 0b0000); + let r = _mm512_shuffle_i32x4::<0b0000>(a, b); let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7); assert_eq_m512i(r, e); } @@ -47807,9 +47728,9 @@ mod tests { unsafe fn test_mm512_mask_shuffle_i32x4() { let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm512_mask_shuffle_i32x4(a, 0, a, b, 0b0000); + let r = _mm512_mask_shuffle_i32x4::<0b0000>(a, 0, a, b); assert_eq_m512i(r, a); - let r = _mm512_mask_shuffle_i32x4(a, 0b11111111_11111111, a, b, 0b0000); + let r = _mm512_mask_shuffle_i32x4::<0b0000>(a, 0b11111111_11111111, a, b); let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7); assert_eq_m512i(r, e); } @@ -47818,9 +47739,9 @@ mod tests { unsafe fn test_mm512_maskz_shuffle_i32x4() { let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm512_maskz_shuffle_i32x4(0, a, b, 0b0000); + let r = _mm512_maskz_shuffle_i32x4::<0b0000>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_shuffle_i32x4(0b00000000_11111111, a, b, 0b0000); + let r = _mm512_maskz_shuffle_i32x4::<0b0000>(0b00000000_11111111, a, b); let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); } @@ -47829,7 +47750,7 @@ mod tests { unsafe fn test_mm256_shuffle_i32x4() { let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16); let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm256_shuffle_i32x4(a, b, 0b00); + let r = _mm256_shuffle_i32x4::<0b00>(a, b); let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16); assert_eq_m256i(r, e); } @@ -47838,9 +47759,9 @@ mod tests { unsafe fn test_mm256_mask_shuffle_i32x4() { let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16); let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm256_mask_shuffle_i32x4(a, 0, a, b, 0b00); + let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0, a, b); assert_eq_m256i(r, a); - let r = _mm256_mask_shuffle_i32x4(a, 0b11111111, a, b, 0b00); + let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0b11111111, a, b); let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16); assert_eq_m256i(r, e); } @@ -47849,9 +47770,9 @@ mod tests { unsafe fn test_mm256_maskz_shuffle_i32x4() { let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16); let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm256_maskz_shuffle_i32x4(0, a, b, 0b00); + let r = _mm256_maskz_shuffle_i32x4::<0b00>(0, a, b); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_shuffle_i32x4(0b11111111, a, b, 0b00); + let r = _mm256_maskz_shuffle_i32x4::<0b00>(0b11111111, a, b); let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16); assert_eq_m256i(r, e); } From c762fec5f01d09803c0d927856f5e3a1fb2c6133 Mon Sep 17 00:00:00 2001 From: jironglin Date: Wed, 3 Mar 2021 23:44:49 +0000 Subject: [PATCH 036/123] shuffle_f32x4 --- crates/core_arch/src/x86/avx512f.rs | 254 ++++++++++------------------ 1 file changed, 94 insertions(+), 160 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index cdc3f2d003..b5d49b8677 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -21909,7 +21909,7 @@ pub unsafe fn _mm_maskz_shuffle_pd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shuffle_i32&expand=5177) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10010101))] //should be vshufi32x4 +#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_01_01_01))] //should be vshufi32x4 #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_shuffle_i32x4(a: __m512i, b: __m512i) -> __m512i { static_assert_imm8!(MASK); @@ -21945,7 +21945,7 @@ pub unsafe fn _mm512_shuffle_i32x4(a: __m512i, b: __m512i) -> _ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_i32x&expand=5175) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10110101))] +#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))] #[rustc_legacy_const_generics(4)] pub unsafe fn _mm512_mask_shuffle_i32x4( src: __m512i, @@ -21962,7 +21962,7 @@ pub unsafe fn _mm512_mask_shuffle_i32x4( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_i32&expand=5176) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10110101))] +#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))] #[rustc_legacy_const_generics(3)] pub unsafe fn _mm512_maskz_shuffle_i32x4( k: __mmask16, @@ -21979,7 +21979,7 @@ pub unsafe fn _mm512_maskz_shuffle_i32x4( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_i32x4&expand=5174) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vperm, MASK = 0b1001))] //should be vshufi32x4 +#[cfg_attr(test, assert_instr(vperm, MASK = 0b11))] //should be vshufi32x4 #[rustc_legacy_const_generics(2)] pub unsafe fn _mm256_shuffle_i32x4(a: __m256i, b: __m256i) -> __m256i { static_assert_imm8!(MASK); @@ -22007,7 +22007,7 @@ pub unsafe fn _mm256_shuffle_i32x4(a: __m256i, b: __m256i) -> _ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shuffle_i32x4&expand=5172) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b1101))] +#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))] #[rustc_legacy_const_generics(4)] pub unsafe fn _mm256_mask_shuffle_i32x4( src: __m256i, @@ -22025,7 +22025,7 @@ pub unsafe fn _mm256_mask_shuffle_i32x4( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shuffle_i32x4&expand=5173) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b1101))] +#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))] #[rustc_legacy_const_generics(3)] pub unsafe fn _mm256_maskz_shuffle_i32x4( k: __mmask8, @@ -22234,75 +22234,35 @@ pub unsafe fn _mm256_maskz_shuffle_i64x2( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shuffle_f32x4&expand=5165) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))] //should be vshuff32x4, but generate vshuff64x2 -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_shuffle_f32x4(a: __m512, b: __m512, imm8: i32) -> __m512 { - assert!(imm8 >= 0 && imm8 <= 255); - let imm8 = (imm8 & 0xFF) as u8; - macro_rules! shuffle4 { - ( - $a:expr, - $b:expr, - $c:expr, - $d:expr, - $e:expr, - $f:expr, - $g:expr, - $h:expr, - $i:expr, - $j:expr, - $k:expr, - $l:expr, - $m:expr, - $n:expr, - $o:expr, - $p:expr - ) => { - simd_shuffle16( - a, - b, - [ - $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p, - ], - ) - }; - } - macro_rules! shuffle3 { - ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => { - match (imm8 >> 6) & 0x3 { - 0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19), - 1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23), - 2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27), - _ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31), - } - }; - } - macro_rules! shuffle2 { - ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => { - match (imm8 >> 4) & 0x3 { - 0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19), - 1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23), - 2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27), - _ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31), - } - }; - } - macro_rules! shuffle1 { - ($a:expr, $e:expr, $i: expr, $m: expr) => { - match (imm8 >> 2) & 0x3 { - 0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3), - 1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7), - 2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11), - _ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15), - } - }; - } - match imm8 & 0x3 { - 0 => shuffle1!(0, 1, 2, 3), - 1 => shuffle1!(4, 5, 6, 7), - 2 => shuffle1!(8, 9, 10, 11), - _ => shuffle1!(12, 13, 14, 15), - } +#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b1011))] //should be vshuff32x4, but generate vshuff64x2 +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_shuffle_f32x4(a: __m512, b: __m512) -> __m512 { + static_assert_imm8!(MASK); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r: f32x16 = simd_shuffle16( + a, + b, + [ + (MASK as u32 & 0b11) * 4 + 0, + (MASK as u32 & 0b11) * 4 + 1, + (MASK as u32 & 0b11) * 4 + 2, + (MASK as u32 & 0b11) * 4 + 3, + ((MASK as u32 >> 2) & 0b11) * 4 + 0, + ((MASK as u32 >> 2) & 0b11) * 4 + 1, + ((MASK as u32 >> 2) & 0b11) * 4 + 2, + ((MASK as u32 >> 2) & 0b11) * 4 + 3, + ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16, + ], + ); + transmute(r) } /// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22310,21 +22270,16 @@ pub unsafe fn _mm512_shuffle_f32x4(a: __m512, b: __m512, imm8: i32) -> __m512 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_f32&expand=5163) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshuff32x4, imm8 = 0b10111111))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_shuffle_f32x4( +#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_shuffle_f32x4( src: __m512, k: __mmask16, a: __m512, b: __m512, - imm8: i32, ) -> __m512 { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_f32x4(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(MASK); + let r = _mm512_shuffle_f32x4::(a, b); transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16())) } @@ -22333,15 +22288,15 @@ pub unsafe fn _mm512_mask_shuffle_f32x4( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_f32&expand=5164) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshuff32x4, imm8 = 0b10111111))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_shuffle_f32x4(k: __mmask16, a: __m512, b: __m512, imm8: i32) -> __m512 { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_f32x4(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_shuffle_f32x4( + k: __mmask16, + a: __m512, + b: __m512, +) -> __m512 { + static_assert_imm8!(MASK); + let r = _mm512_shuffle_f32x4::(a, b); let zero = _mm512_setzero_ps().as_f32x16(); transmute(simd_select_bitmask(k, r.as_f32x16(), zero)) } @@ -22351,40 +22306,26 @@ pub unsafe fn _mm512_maskz_shuffle_f32x4(k: __mmask16, a: __m512, b: __m512, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_f32x4&expand=5162) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vperm, imm8 = 0b01))] //should be vshuff32x4 -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_shuffle_f32x4(a: __m256, b: __m256, imm8: i32) -> __m256 { - assert!(imm8 >= 0 && imm8 <= 255); - let imm8 = (imm8 & 0xFF) as u8; +#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff32x4 +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_shuffle_f32x4(a: __m256, b: __m256) -> __m256 { + static_assert_imm8!(MASK); let a = a.as_f32x8(); let b = b.as_f32x8(); - macro_rules! shuffle2 { - ( - $a:expr, - $b:expr, - $c:expr, - $d:expr, - $e:expr, - $f:expr, - $g:expr, - $h:expr - ) => { - simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]) - }; - } - macro_rules! shuffle1 { - ($a:expr, $b:expr, $c: expr, $d: expr) => { - match (imm8 >> 1) & 0x1 { - 0 => shuffle2!($a, $b, $c, $d, 8, 9, 10, 11), - _ => shuffle2!($a, $b, $c, $d, 12, 13, 14, 15), - } - }; - } - let r: f32x8 = match imm8 & 0x1 { - 0 => shuffle1!(0, 1, 2, 3), - _ => shuffle1!(4, 5, 6, 7), - }; - + let r: f32x8 = simd_shuffle8( + a, + b, + [ + (MASK as u32 & 0b1) * 4 + 0, + (MASK as u32 & 0b1) * 4 + 1, + (MASK as u32 & 0b1) * 4 + 2, + (MASK as u32 & 0b1) * 4 + 3, + ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8, + ], + ); transmute(r) } @@ -22393,21 +22334,15 @@ pub unsafe fn _mm256_shuffle_f32x4(a: __m256, b: __m256, imm8: i32) -> __m256 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shuffle_f32x4&expand=5160) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshuff32x4, imm8 = 0b11))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_shuffle_f32x4( +#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_shuffle_f32x4( src: __m256, k: __mmask8, a: __m256, b: __m256, - imm8: i32, ) -> __m256 { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_f32x4(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = _mm256_shuffle_f32x4::(a, b); transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8())) } @@ -22416,15 +22351,14 @@ pub unsafe fn _mm256_mask_shuffle_f32x4( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shuffle_f32x4&expand=5161) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshuff32x4, imm8 = 0b11))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_maskz_shuffle_f32x4(k: __mmask8, a: __m256, b: __m256, imm8: i32) -> __m256 { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_f32x4(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_maskz_shuffle_f32x4( + k: __mmask8, + a: __m256, + b: __m256, +) -> __m256 { + let r = _mm256_shuffle_f32x4::(a, b); let zero = _mm256_setzero_ps().as_f32x8(); transmute(simd_select_bitmask(k, r.as_f32x8(), zero)) } @@ -47719,7 +47653,7 @@ mod tests { unsafe fn test_mm512_shuffle_i32x4() { let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm512_shuffle_i32x4::<0b0000>(a, b); + let r = _mm512_shuffle_i32x4::<0b00_00_00_00>(a, b); let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7); assert_eq_m512i(r, e); } @@ -47728,9 +47662,9 @@ mod tests { unsafe fn test_mm512_mask_shuffle_i32x4() { let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm512_mask_shuffle_i32x4::<0b0000>(a, 0, a, b); + let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0, a, b); assert_eq_m512i(r, a); - let r = _mm512_mask_shuffle_i32x4::<0b0000>(a, 0b11111111_11111111, a, b); + let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b); let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7); assert_eq_m512i(r, e); } @@ -47739,9 +47673,9 @@ mod tests { unsafe fn test_mm512_maskz_shuffle_i32x4() { let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16); let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm512_maskz_shuffle_i32x4::<0b0000>(0, a, b); + let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_shuffle_i32x4::<0b0000>(0b00000000_11111111, a, b); + let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0b00000000_11111111, a, b); let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); } @@ -47785,7 +47719,7 @@ mod tests { let b = _mm512_setr_ps( 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15., ); - let r = _mm512_shuffle_f32x4(a, b, 0b00000000); + let r = _mm512_shuffle_f32x4::<0b00_00_00_00>(a, b); let e = _mm512_setr_ps( 1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7., ); @@ -47800,9 +47734,9 @@ mod tests { let b = _mm512_setr_ps( 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15., ); - let r = _mm512_mask_shuffle_f32x4(a, 0, a, b, 0b00000000); + let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0, a, b); assert_eq_m512(r, a); - let r = _mm512_mask_shuffle_f32x4(a, 0b11111111_11111111, a, b, 0b00000000); + let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b); let e = _mm512_setr_ps( 1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7., ); @@ -47817,9 +47751,9 @@ mod tests { let b = _mm512_setr_ps( 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15., ); - let r = _mm512_maskz_shuffle_f32x4(0, a, b, 0b00000000); + let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0, a, b); assert_eq_m512(r, _mm512_setzero_ps()); - let r = _mm512_maskz_shuffle_f32x4(0b00000000_11111111, a, b, 0b00000000); + let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0b00000000_11111111, a, b); let e = _mm512_setr_ps( 1., 4., 5., 8., 1., 4., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0., ); @@ -47830,7 +47764,7 @@ mod tests { unsafe fn test_mm256_shuffle_f32x4() { let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.); let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.); - let r = _mm256_shuffle_f32x4(a, b, 0b00); + let r = _mm256_shuffle_f32x4::<0b00>(a, b); let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.); assert_eq_m256(r, e); } @@ -47839,9 +47773,9 @@ mod tests { unsafe fn test_mm256_mask_shuffle_f32x4() { let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.); let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.); - let r = _mm256_mask_shuffle_f32x4(a, 0, a, b, 0b00); + let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0, a, b); assert_eq_m256(r, a); - let r = _mm256_mask_shuffle_f32x4(a, 0b11111111, a, b, 0b00); + let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0b11111111, a, b); let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.); assert_eq_m256(r, e); } @@ -47850,9 +47784,9 @@ mod tests { unsafe fn test_mm256_maskz_shuffle_f32x4() { let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.); let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.); - let r = _mm256_maskz_shuffle_f32x4(0, a, b, 0b00); + let r = _mm256_maskz_shuffle_f32x4::<0b00>(0, a, b); assert_eq_m256(r, _mm256_setzero_ps()); - let r = _mm256_maskz_shuffle_f32x4(0b11111111, a, b, 0b00); + let r = _mm256_maskz_shuffle_f32x4::<0b00>(0b11111111, a, b); let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.); assert_eq_m256(r, e); } From e4715647647f1c02ecf77557e62fb6195bd2df7c Mon Sep 17 00:00:00 2001 From: jironglin Date: Thu, 4 Mar 2021 00:18:12 +0000 Subject: [PATCH 037/123] shuffle_i64x2 --- crates/core_arch/src/x86/avx512f.rs | 173 ++++++++----------------- crates/core_arch/src/x86_64/avx512f.rs | 20 +-- 2 files changed, 61 insertions(+), 132 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index b5d49b8677..c00dbaea21 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -22043,61 +22043,27 @@ pub unsafe fn _mm256_maskz_shuffle_i32x4( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shuffle_i64x2&expand=5183) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_shuffle_i64x2(a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); - let imm8 = (imm8 & 0xFF) as u8; - macro_rules! shuffle4 { - ( - $a:expr, - $b:expr, - $c:expr, - $d:expr, - $e:expr, - $f:expr, - $g:expr, - $h:expr - ) => { - simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]) - }; - } - macro_rules! shuffle3 { - ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => { - match (imm8 >> 6) & 0x3 { - 0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9), - 1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11), - 2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13), - _ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15), - } - }; - } - macro_rules! shuffle2 { - ($a:expr, $b:expr, $e:expr, $f:expr) => { - match (imm8 >> 4) & 0x3 { - 0 => shuffle3!($a, $b, $e, $f, 8, 9), - 1 => shuffle3!($a, $b, $e, $f, 10, 11), - 2 => shuffle3!($a, $b, $e, $f, 12, 13), - _ => shuffle3!($a, $b, $e, $f, 14, 15), - } - }; - } - macro_rules! shuffle1 { - ($a:expr, $e:expr) => { - match (imm8 >> 2) & 0x3 { - 0 => shuffle2!($a, $e, 0, 1), - 1 => shuffle2!($a, $e, 2, 3), - 2 => shuffle2!($a, $e, 4, 5), - _ => shuffle2!($a, $e, 6, 7), - } - }; - } - match imm8 & 0x3 { - 0 => shuffle1!(0, 1), - 1 => shuffle1!(2, 3), - 2 => shuffle1!(4, 5), - _ => shuffle1!(6, 7), - } +#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_shuffle_i64x2(a: __m512i, b: __m512i) -> __m512i { + static_assert_imm8!(MASK); + let a = a.as_i64x8(); + let b = b.as_i64x8(); + let r: i64x8 = simd_shuffle8( + a, + b, + [ + (MASK as u32 & 0b11) * 2 + 0, + (MASK as u32 & 0b11) * 2 + 1, + ((MASK as u32 >> 2) & 0b11) * 2 + 0, + ((MASK as u32 >> 2) & 0b11) * 2 + 1, + ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8, + ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8, + ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8, + ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8, + ], + ); + transmute(r) } /// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22105,21 +22071,15 @@ pub unsafe fn _mm512_shuffle_i64x2(a: __m512i, b: __m512i, imm8: i32) -> __m512i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_i64x&expand=5181) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_shuffle_i64x2( +#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_shuffle_i64x2( src: __m512i, k: __mmask8, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_i64x2(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = _mm512_shuffle_i64x2::(a, b); transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8())) } @@ -22128,20 +22088,14 @@ pub unsafe fn _mm512_mask_shuffle_i64x2( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_i64&expand=5182) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_shuffle_i64x2( +#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_shuffle_i64x2( k: __mmask8, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_i64x2(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = _mm512_shuffle_i64x2::(a, b); let zero = _mm512_setzero_si512().as_i64x8(); transmute(simd_select_bitmask(k, r.as_i64x8(), zero)) } @@ -22151,35 +22105,22 @@ pub unsafe fn _mm512_maskz_shuffle_i64x2( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_i64x2&expand=5180) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vperm, imm8 = 0b01))] //should be vshufi64x2 -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_shuffle_i64x2(a: __m256i, b: __m256i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); - let imm8 = (imm8 & 0xFF) as u8; +#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshufi64x2 +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_shuffle_i64x2(a: __m256i, b: __m256i) -> __m256i { + static_assert_imm8!(MASK); let a = a.as_i64x4(); let b = b.as_i64x4(); - macro_rules! shuffle2 { - ( - $a:expr, - $b:expr, - $c:expr, - $d:expr - ) => { - simd_shuffle4(a, b, [$a, $b, $c, $d]) - }; - } - macro_rules! shuffle1 { - ($a:expr, $b:expr) => { - match (imm8 >> 1) & 0x1 { - 0 => shuffle2!($a, $b, 4, 5), - _ => shuffle2!($a, $b, 6, 7), - } - }; - } - let r: i64x4 = match imm8 & 0x1 { - 0 => shuffle1!(0, 1), - _ => shuffle1!(2, 3), - }; + let r: i64x4 = simd_shuffle4( + a, + b, + [ + (MASK as u32 & 0b1) * 2 + 0, + (MASK as u32 & 0b1) * 2 + 1, + ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4, + ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4, + ], + ); transmute(r) } @@ -22188,21 +22129,15 @@ pub unsafe fn _mm256_shuffle_i64x2(a: __m256i, b: __m256i, imm8: i32) -> __m256i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shuffle_i64x2&expand=5178) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b11))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_shuffle_i64x2( +#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_shuffle_i64x2( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_i64x2(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = _mm256_shuffle_i64x2::(a, b); transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4())) } @@ -22211,20 +22146,14 @@ pub unsafe fn _mm256_mask_shuffle_i64x2( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shuffle_i64x2&expand=5179) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b11))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_maskz_shuffle_i64x2( +#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_maskz_shuffle_i64x2( k: __mmask8, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_i64x2(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = _mm256_shuffle_i64x2::(a, b); let zero = _mm256_setzero_si256().as_i64x4(); transmute(simd_select_bitmask(k, r.as_i64x4(), zero)) } diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index 2db8a430d4..6d816b86c3 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -9658,7 +9658,7 @@ mod tests { unsafe fn test_mm512_shuffle_i64x2() { let a = _mm512_setr_epi64(1, 4, 5, 8, 9, 12, 13, 16); let b = _mm512_setr_epi64(2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm512_shuffle_i64x2(a, b, 0b00000000); + let r = _mm512_shuffle_i64x2::<0b00_00_00_00>(a, b); let e = _mm512_setr_epi64(1, 4, 1, 4, 2, 3, 2, 3); assert_eq_m512i(r, e); } @@ -9667,9 +9667,9 @@ mod tests { unsafe fn test_mm512_mask_shuffle_i64x2() { let a = _mm512_setr_epi64(1, 4, 5, 8, 9, 12, 13, 16); let b = _mm512_setr_epi64(2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm512_mask_shuffle_i64x2(a, 0, a, b, 0b00000000); + let r = _mm512_mask_shuffle_i64x2::<0b00_00_00_00>(a, 0, a, b); assert_eq_m512i(r, a); - let r = _mm512_mask_shuffle_i64x2(a, 0b11111111, a, b, 0b00000000); + let r = _mm512_mask_shuffle_i64x2::<0b00_00_00_00>(a, 0b11111111, a, b); let e = _mm512_setr_epi64(1, 4, 1, 4, 2, 3, 2, 3); assert_eq_m512i(r, e); } @@ -9678,9 +9678,9 @@ mod tests { unsafe fn test_mm512_maskz_shuffle_i64x2() { let a = _mm512_setr_epi64(1, 4, 5, 8, 9, 12, 13, 16); let b = _mm512_setr_epi64(2, 3, 6, 7, 10, 11, 14, 15); - let r = _mm512_maskz_shuffle_i64x2(0, a, b, 0b00000000); + let r = _mm512_maskz_shuffle_i64x2::<0b00_00_00_00>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_shuffle_i64x2(0b00001111, a, b, 0b00000000); + let r = _mm512_maskz_shuffle_i64x2::<0b00_00_00_00>(0b00001111, a, b); let e = _mm512_setr_epi64(1, 4, 1, 4, 0, 0, 0, 0); assert_eq_m512i(r, e); } @@ -9689,7 +9689,7 @@ mod tests { unsafe fn test_mm256_shuffle_i64x2() { let a = _mm256_set_epi64x(1, 4, 5, 8); let b = _mm256_set_epi64x(2, 3, 6, 7); - let r = _mm256_shuffle_i64x2(a, b, 0b00); + let r = _mm256_shuffle_i64x2::<0b00>(a, b); let e = _mm256_set_epi64x(6, 7, 5, 8); assert_eq_m256i(r, e); } @@ -9698,9 +9698,9 @@ mod tests { unsafe fn test_mm256_mask_shuffle_i64x2() { let a = _mm256_set_epi64x(1, 4, 5, 8); let b = _mm256_set_epi64x(2, 3, 6, 7); - let r = _mm256_mask_shuffle_i64x2(a, 0, a, b, 0b00); + let r = _mm256_mask_shuffle_i64x2::<0b00>(a, 0, a, b); assert_eq_m256i(r, a); - let r = _mm256_mask_shuffle_i64x2(a, 0b00001111, a, b, 0b00); + let r = _mm256_mask_shuffle_i64x2::<0b00>(a, 0b00001111, a, b); let e = _mm256_set_epi64x(6, 7, 5, 8); assert_eq_m256i(r, e); } @@ -9709,9 +9709,9 @@ mod tests { unsafe fn test_mm256_maskz_shuffle_i64x2() { let a = _mm256_set_epi64x(1, 4, 5, 8); let b = _mm256_set_epi64x(2, 3, 6, 7); - let r = _mm256_maskz_shuffle_i64x2(0, a, b, 0b00); + let r = _mm256_maskz_shuffle_i64x2::<0b00>(0, a, b); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_shuffle_i64x2(0b00001111, a, b, 0b00); + let r = _mm256_maskz_shuffle_i64x2::<0b00>(0b00001111, a, b); let e = _mm256_set_epi64x(6, 7, 5, 8); assert_eq_m256i(r, e); } From b93d2252a3d3106980105aa5d30615a754804d6f Mon Sep 17 00:00:00 2001 From: jironglin Date: Thu, 4 Mar 2021 00:41:22 +0000 Subject: [PATCH 038/123] shuffle_f64x2 --- crates/core_arch/src/x86/avx512f.rs | 197 ++++++++++--------------- crates/core_arch/src/x86_64/avx512f.rs | 20 +-- 2 files changed, 85 insertions(+), 132 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index c00dbaea21..5abe23e093 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -21692,6 +21692,7 @@ pub unsafe fn _mm512_mask_shuffle_ps( a: __m512, b: __m512, ) -> __m512 { + static_assert_imm8!(MASK); let r = _mm512_shuffle_ps::(a, b); transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16())) } @@ -21708,6 +21709,7 @@ pub unsafe fn _mm512_maskz_shuffle_ps( a: __m512, b: __m512, ) -> __m512 { + static_assert_imm8!(MASK); let r = _mm512_shuffle_ps::(a, b); let zero = _mm512_setzero_ps().as_f32x16(); transmute(simd_select_bitmask(k, r.as_f32x16(), zero)) @@ -21726,6 +21728,7 @@ pub unsafe fn _mm256_mask_shuffle_ps( a: __m256, b: __m256, ) -> __m256 { + static_assert_imm8!(MASK); let r = _mm256_shuffle_ps::(a, b); transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8())) } @@ -21742,6 +21745,7 @@ pub unsafe fn _mm256_maskz_shuffle_ps( a: __m256, b: __m256, ) -> __m256 { + static_assert_imm8!(MASK); let r = _mm256_shuffle_ps::(a, b); let zero = _mm256_setzero_ps().as_f32x8(); transmute(simd_select_bitmask(k, r.as_f32x8(), zero)) @@ -21760,6 +21764,7 @@ pub unsafe fn _mm_mask_shuffle_ps( a: __m128, b: __m128, ) -> __m128 { + static_assert_imm8!(MASK); let r = _mm_shuffle_ps::(a, b); transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4())) } @@ -21772,6 +21777,7 @@ pub unsafe fn _mm_mask_shuffle_ps( #[cfg_attr(test, assert_instr(vshufps, MASK = 3))] #[rustc_legacy_const_generics(3)] pub unsafe fn _mm_maskz_shuffle_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + static_assert_imm8!(MASK); let r = _mm_shuffle_ps::(a, b); let zero = _mm_setzero_ps().as_f32x4(); transmute(simd_select_bitmask(k, r.as_f32x4(), zero)) @@ -21815,6 +21821,7 @@ pub unsafe fn _mm512_mask_shuffle_pd( a: __m512d, b: __m512d, ) -> __m512d { + static_assert_imm8!(MASK); let r = _mm512_shuffle_pd::(a, b); transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8())) } @@ -21831,6 +21838,7 @@ pub unsafe fn _mm512_maskz_shuffle_pd( a: __m512d, b: __m512d, ) -> __m512d { + static_assert_imm8!(MASK); let r = _mm512_shuffle_pd::(a, b); let zero = _mm512_setzero_pd().as_f64x8(); transmute(simd_select_bitmask(k, r.as_f64x8(), zero)) @@ -21849,6 +21857,7 @@ pub unsafe fn _mm256_mask_shuffle_pd( a: __m256d, b: __m256d, ) -> __m256d { + static_assert_imm8!(MASK); let r = _mm256_shuffle_pd::(a, b); transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4())) } @@ -21865,6 +21874,7 @@ pub unsafe fn _mm256_maskz_shuffle_pd( a: __m256d, b: __m256d, ) -> __m256d { + static_assert_imm8!(MASK); let r = _mm256_shuffle_pd::(a, b); let zero = _mm256_setzero_pd().as_f64x4(); transmute(simd_select_bitmask(k, r.as_f64x4(), zero)) @@ -21883,6 +21893,7 @@ pub unsafe fn _mm_mask_shuffle_pd( a: __m128d, b: __m128d, ) -> __m128d { + static_assert_imm8!(MASK); let r = _mm_shuffle_pd::(a, b); transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2())) } @@ -21899,6 +21910,7 @@ pub unsafe fn _mm_maskz_shuffle_pd( a: __m128d, b: __m128d, ) -> __m128d { + static_assert_imm8!(MASK); let r = _mm_shuffle_pd::(a, b); let zero = _mm_setzero_pd().as_f64x2(); transmute(simd_select_bitmask(k, r.as_f64x2(), zero)) @@ -21953,6 +21965,7 @@ pub unsafe fn _mm512_mask_shuffle_i32x4( a: __m512i, b: __m512i, ) -> __m512i { + static_assert_imm8!(MASK); let r = _mm512_shuffle_i32x4::(a, b); transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16())) } @@ -21969,6 +21982,7 @@ pub unsafe fn _mm512_maskz_shuffle_i32x4( a: __m512i, b: __m512i, ) -> __m512i { + static_assert_imm8!(MASK); let r = _mm512_shuffle_i32x4::(a, b); let zero = _mm512_setzero_si512().as_i32x16(); transmute(simd_select_bitmask(k, r.as_i32x16(), zero)) @@ -22079,6 +22093,7 @@ pub unsafe fn _mm512_mask_shuffle_i64x2( a: __m512i, b: __m512i, ) -> __m512i { + static_assert_imm8!(MASK); let r = _mm512_shuffle_i64x2::(a, b); transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8())) } @@ -22095,6 +22110,7 @@ pub unsafe fn _mm512_maskz_shuffle_i64x2( a: __m512i, b: __m512i, ) -> __m512i { + static_assert_imm8!(MASK); let r = _mm512_shuffle_i64x2::(a, b); let zero = _mm512_setzero_si512().as_i64x8(); transmute(simd_select_bitmask(k, r.as_i64x8(), zero)) @@ -22137,6 +22153,7 @@ pub unsafe fn _mm256_mask_shuffle_i64x2( a: __m256i, b: __m256i, ) -> __m256i { + static_assert_imm8!(MASK); let r = _mm256_shuffle_i64x2::(a, b); transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4())) } @@ -22153,6 +22170,7 @@ pub unsafe fn _mm256_maskz_shuffle_i64x2( a: __m256i, b: __m256i, ) -> __m256i { + static_assert_imm8!(MASK); let r = _mm256_shuffle_i64x2::(a, b); let zero = _mm256_setzero_si256().as_i64x4(); transmute(simd_select_bitmask(k, r.as_i64x4(), zero)) @@ -22271,6 +22289,7 @@ pub unsafe fn _mm256_mask_shuffle_f32x4( a: __m256, b: __m256, ) -> __m256 { + static_assert_imm8!(MASK); let r = _mm256_shuffle_f32x4::(a, b); transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8())) } @@ -22287,6 +22306,7 @@ pub unsafe fn _mm256_maskz_shuffle_f32x4( a: __m256, b: __m256, ) -> __m256 { + static_assert_imm8!(MASK); let r = _mm256_shuffle_f32x4::(a, b); let zero = _mm256_setzero_ps().as_f32x8(); transmute(simd_select_bitmask(k, r.as_f32x8(), zero)) @@ -22297,61 +22317,27 @@ pub unsafe fn _mm256_maskz_shuffle_f32x4( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shuffle_f64x2&expand=5171) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_shuffle_f64x2(a: __m512d, b: __m512d, imm8: i32) -> __m512d { - assert!(imm8 >= 0 && imm8 <= 255); - let imm8 = (imm8 & 0xFF) as u8; - macro_rules! shuffle4 { - ( - $a:expr, - $b:expr, - $c:expr, - $d:expr, - $e:expr, - $f:expr, - $g:expr, - $h:expr - ) => { - simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]) - }; - } - macro_rules! shuffle3 { - ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => { - match (imm8 >> 6) & 0x3 { - 0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9), - 1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11), - 2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13), - _ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15), - } - }; - } - macro_rules! shuffle2 { - ($a:expr, $b:expr, $e:expr, $f:expr) => { - match (imm8 >> 4) & 0x3 { - 0 => shuffle3!($a, $b, $e, $f, 8, 9), - 1 => shuffle3!($a, $b, $e, $f, 10, 11), - 2 => shuffle3!($a, $b, $e, $f, 12, 13), - _ => shuffle3!($a, $b, $e, $f, 14, 15), - } - }; - } - macro_rules! shuffle1 { - ($a:expr, $e:expr) => { - match (imm8 >> 2) & 0x3 { - 0 => shuffle2!($a, $e, 0, 1), - 1 => shuffle2!($a, $e, 2, 3), - 2 => shuffle2!($a, $e, 4, 5), - _ => shuffle2!($a, $e, 6, 7), - } - }; - } - match imm8 & 0x3 { - 0 => shuffle1!(0, 1), - 1 => shuffle1!(2, 3), - 2 => shuffle1!(4, 5), - _ => shuffle1!(6, 7), - } +#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_shuffle_f64x2(a: __m512d, b: __m512d) -> __m512d { + static_assert_imm8!(MASK); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r: f64x8 = simd_shuffle8( + a, + b, + [ + (MASK as u32 & 0b11) * 2 + 0, + (MASK as u32 & 0b11) * 2 + 1, + ((MASK as u32 >> 2) & 0b11) * 2 + 0, + ((MASK as u32 >> 2) & 0b11) * 2 + 1, + ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8, + ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8, + ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8, + ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8, + ], + ); + transmute(r) } /// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22359,21 +22345,16 @@ pub unsafe fn _mm512_shuffle_f64x2(a: __m512d, b: __m512d, imm8: i32) -> __m512d /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_f64x2&expand=5169) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_shuffle_f64x2( +#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_shuffle_f64x2( src: __m512d, k: __mmask8, a: __m512d, b: __m512d, - imm8: i32, ) -> __m512d { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_f64x2(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(MASK); + let r = _mm512_shuffle_f64x2::(a, b); transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8())) } @@ -22382,20 +22363,15 @@ pub unsafe fn _mm512_mask_shuffle_f64x2( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_f64x2&expand=5170) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_shuffle_f64x2( +#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_shuffle_f64x2( k: __mmask8, a: __m512d, b: __m512d, - imm8: i32, ) -> __m512d { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_f64x2(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(MASK); + let r = _mm512_shuffle_f64x2::(a, b); let zero = _mm512_setzero_pd().as_f64x8(); transmute(simd_select_bitmask(k, r.as_f64x8(), zero)) } @@ -22405,35 +22381,22 @@ pub unsafe fn _mm512_maskz_shuffle_f64x2( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_f64x2&expand=5168) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vperm, imm8 = 0b01))] //should be vshuff64x2 -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_shuffle_f64x2(a: __m256d, b: __m256d, imm8: i32) -> __m256d { - assert!(imm8 >= 0 && imm8 <= 255); - let imm8 = (imm8 & 0xFF) as u8; +#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff64x2 +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_shuffle_f64x2(a: __m256d, b: __m256d) -> __m256d { + static_assert_imm8!(MASK); let a = a.as_f64x4(); let b = b.as_f64x4(); - macro_rules! shuffle2 { - ( - $a:expr, - $b:expr, - $c:expr, - $d:expr - ) => { - simd_shuffle4(a, b, [$a, $b, $c, $d]) - }; - } - macro_rules! shuffle1 { - ($a:expr, $b:expr) => { - match (imm8 >> 1) & 0x1 { - 0 => shuffle2!($a, $b, 4, 5), - _ => shuffle2!($a, $b, 6, 7), - } - }; - } - let r: f64x4 = match imm8 & 0x1 { - 0 => shuffle1!(0, 1), - _ => shuffle1!(2, 3), - }; + let r: f64x4 = simd_shuffle4( + a, + b, + [ + (MASK as u32 & 0b1) * 2 + 0, + (MASK as u32 & 0b1) * 2 + 1, + ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4, + ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4, + ], + ); transmute(r) } @@ -22442,21 +22405,16 @@ pub unsafe fn _mm256_shuffle_f64x2(a: __m256d, b: __m256d, imm8: i32) -> __m256d /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shuffle_f64x2&expand=5166) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b11))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_shuffle_f64x2( +#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_shuffle_f64x2( src: __m256d, k: __mmask8, a: __m256d, b: __m256d, - imm8: i32, ) -> __m256d { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_f64x2(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(MASK); + let r = _mm256_shuffle_f64x2::(a, b); transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4())) } @@ -22465,20 +22423,15 @@ pub unsafe fn _mm256_mask_shuffle_f64x2( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shuffle_f64x2&expand=5167) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b11))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_maskz_shuffle_f64x2( +#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_maskz_shuffle_f64x2( k: __mmask8, a: __m256d, b: __m256d, - imm8: i32, ) -> __m256d { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_f64x2(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(MASK); + let r = _mm256_shuffle_f64x2::(a, b); let zero = _mm256_setzero_pd().as_f64x4(); transmute(simd_select_bitmask(k, r.as_f64x4(), zero)) } diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index 6d816b86c3..9ad35f7166 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -9720,7 +9720,7 @@ mod tests { unsafe fn test_mm512_shuffle_f64x2() { let a = _mm512_setr_pd(1., 4., 5., 8., 9., 12., 13., 16.); let b = _mm512_setr_pd(2., 3., 6., 7., 10., 11., 14., 15.); - let r = _mm512_shuffle_f64x2(a, b, 0b00000000); + let r = _mm512_shuffle_f64x2::<0b00_00_00_00>(a, b); let e = _mm512_setr_pd(1., 4., 1., 4., 2., 3., 2., 3.); assert_eq_m512d(r, e); } @@ -9729,9 +9729,9 @@ mod tests { unsafe fn test_mm512_mask_shuffle_f64x2() { let a = _mm512_setr_pd(1., 4., 5., 8., 9., 12., 13., 16.); let b = _mm512_setr_pd(2., 3., 6., 7., 10., 11., 14., 15.); - let r = _mm512_mask_shuffle_f64x2(a, 0, a, b, 0b00000000); + let r = _mm512_mask_shuffle_f64x2::<0b00_00_00_00>(a, 0, a, b); assert_eq_m512d(r, a); - let r = _mm512_mask_shuffle_f64x2(a, 0b11111111, a, b, 0b00000000); + let r = _mm512_mask_shuffle_f64x2::<0b00_00_00_00>(a, 0b11111111, a, b); let e = _mm512_setr_pd(1., 4., 1., 4., 2., 3., 2., 3.); assert_eq_m512d(r, e); } @@ -9740,9 +9740,9 @@ mod tests { unsafe fn test_mm512_maskz_shuffle_f64x2() { let a = _mm512_setr_pd(1., 4., 5., 8., 9., 12., 13., 16.); let b = _mm512_setr_pd(2., 3., 6., 7., 10., 11., 14., 15.); - let r = _mm512_maskz_shuffle_f64x2(0, a, b, 0b00000000); + let r = _mm512_maskz_shuffle_f64x2::<0b00_00_00_00>(0, a, b); assert_eq_m512d(r, _mm512_setzero_pd()); - let r = _mm512_maskz_shuffle_f64x2(0b00001111, a, b, 0b00000000); + let r = _mm512_maskz_shuffle_f64x2::<0b00_00_00_00>(0b00001111, a, b); let e = _mm512_setr_pd(1., 4., 1., 4., 0., 0., 0., 0.); assert_eq_m512d(r, e); } @@ -9751,7 +9751,7 @@ mod tests { unsafe fn test_mm256_shuffle_f64x2() { let a = _mm256_set_pd(1., 4., 5., 8.); let b = _mm256_set_pd(2., 3., 6., 7.); - let r = _mm256_shuffle_f64x2(a, b, 0b00); + let r = _mm256_shuffle_f64x2::<0b00>(a, b); let e = _mm256_set_pd(6., 7., 5., 8.); assert_eq_m256d(r, e); } @@ -9760,9 +9760,9 @@ mod tests { unsafe fn test_mm256_mask_shuffle_f64x2() { let a = _mm256_set_pd(1., 4., 5., 8.); let b = _mm256_set_pd(2., 3., 6., 7.); - let r = _mm256_mask_shuffle_f64x2(a, 0, a, b, 0b00); + let r = _mm256_mask_shuffle_f64x2::<0b00>(a, 0, a, b); assert_eq_m256d(r, a); - let r = _mm256_mask_shuffle_f64x2(a, 0b00001111, a, b, 0b00); + let r = _mm256_mask_shuffle_f64x2::<0b00>(a, 0b00001111, a, b); let e = _mm256_set_pd(6., 7., 5., 8.); assert_eq_m256d(r, e); } @@ -9771,9 +9771,9 @@ mod tests { unsafe fn test_mm256_maskz_shuffle_f64x2() { let a = _mm256_set_pd(1., 4., 5., 8.); let b = _mm256_set_pd(2., 3., 6., 7.); - let r = _mm256_maskz_shuffle_f64x2(0, a, b, 0b00); + let r = _mm256_maskz_shuffle_f64x2::<0b00>(0, a, b); assert_eq_m256d(r, _mm256_setzero_pd()); - let r = _mm256_maskz_shuffle_f64x2(0b00001111, a, b, 0b00); + let r = _mm256_maskz_shuffle_f64x2::<0b00>(0b00001111, a, b); let e = _mm256_set_pd(6., 7., 5., 8.); assert_eq_m256d(r, e); } From cc48c224c14895451a4af14bc028d212a45f3cdb Mon Sep 17 00:00:00 2001 From: jironglin Date: Thu, 4 Mar 2021 14:21:46 +0000 Subject: [PATCH 039/123] mm_cvtt_roundss,sd_u64,i64,si64; mm_cvt_roundss,sd_u64,i64,si64; mm_cvt_roundu64,i64,si64_ss,sd --- crates/core_arch/src/x86_64/avx512f.rs | 270 +++++++++---------------- 1 file changed, 90 insertions(+), 180 deletions(-) diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index 9ad35f7166..43906f7714 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -145,16 +145,11 @@ pub unsafe fn _mm_cvttss_u64(a: __m128) -> u64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_sd&expand=1313) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsi2sd, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_cvt_roundi64_sd(a: __m128d, b: i64, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vcvtsi2sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundi64_sd(a: __m128d, b: i64) -> __m128d { let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsi2sd64(a, b, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsi2sd64(a, b, ROUNDING); transmute(r) } @@ -169,16 +164,11 @@ pub unsafe fn _mm_cvt_roundi64_sd(a: __m128d, b: i64, rounding: i32) -> __m128d /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsi64_sd&expand=1367) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsi2sd, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_cvt_roundsi64_sd(a: __m128d, b: i64, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vcvtsi2sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundsi64_sd(a: __m128d, b: i64) -> __m128d { let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsi2sd64(a, b, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsi2sd64(a, b, ROUNDING); transmute(r) } @@ -193,16 +183,11 @@ pub unsafe fn _mm_cvt_roundsi64_sd(a: __m128d, b: i64, rounding: i32) -> __m128d /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_ss&expand=1314) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsi2ss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_cvt_roundi64_ss(a: __m128, b: i64, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundi64_ss(a: __m128, b: i64) -> __m128 { let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtsi2ss64(a, b, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsi2ss64(a, b, ROUNDING); transmute(r) } @@ -217,16 +202,11 @@ pub unsafe fn _mm_cvt_roundi64_ss(a: __m128, b: i64, rounding: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu64_sd&expand=1379) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtusi2sd, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_cvt_roundu64_sd(a: __m128d, b: u64, rounding: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vcvtusi2sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundu64_sd(a: __m128d, b: u64) -> __m128d { let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtusi2sd64(a, b, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtusi2sd64(a, b, ROUNDING); transmute(r) } @@ -241,16 +221,11 @@ pub unsafe fn _mm_cvt_roundu64_sd(a: __m128d, b: u64, rounding: i32) -> __m128d /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsi64_ss&expand=1368) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsi2ss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_cvt_roundsi64_ss(a: __m128, b: i64, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundsi64_ss(a: __m128, b: i64) -> __m128 { let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtsi2ss64(a, b, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsi2ss64(a, b, ROUNDING); transmute(r) } @@ -265,16 +240,11 @@ pub unsafe fn _mm_cvt_roundsi64_ss(a: __m128, b: i64, rounding: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu64_ss&expand=1380) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtusi2ss, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_cvt_roundu64_ss(a: __m128, b: u64, rounding: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundu64_ss(a: __m128, b: u64) -> __m128 { let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtusi2ss64(a, b, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtusi2ss64(a, b, ROUNDING); transmute(r) } @@ -289,16 +259,11 @@ pub unsafe fn _mm_cvt_roundu64_ss(a: __m128, b: u64, rounding: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_si64&expand=1360) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2si, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundsd_si64(a: __m128d, rounding: i32) -> i64 { +#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundsd_si64(a: __m128d) -> i64 { let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2si64(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsd2si64(a, ROUNDING); transmute(r) } @@ -313,16 +278,11 @@ pub unsafe fn _mm_cvt_roundsd_si64(a: __m128d, rounding: i32) -> i64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_i64&expand=1358) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2si, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundsd_i64(a: __m128d, rounding: i32) -> i64 { +#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundsd_i64(a: __m128d) -> i64 { let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2si64(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsd2si64(a, ROUNDING); transmute(r) } @@ -337,16 +297,11 @@ pub unsafe fn _mm_cvt_roundsd_i64(a: __m128d, rounding: i32) -> i64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_u64&expand=1365) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2usi, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundsd_u64(a: __m128d, rounding: i32) -> u64 { +#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundsd_u64(a: __m128d) -> u64 { let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2usi64(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtsd2usi64(a, ROUNDING); transmute(r) } @@ -361,16 +316,11 @@ pub unsafe fn _mm_cvt_roundsd_u64(a: __m128d, rounding: i32) -> u64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_si64&expand=1375) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2si, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundss_si64(a: __m128, rounding: i32) -> i64 { +#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundss_si64(a: __m128) -> i64 { let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2si64(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtss2si64(a, ROUNDING); transmute(r) } @@ -385,16 +335,11 @@ pub unsafe fn _mm_cvt_roundss_si64(a: __m128, rounding: i32) -> i64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_i64&expand=1370) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2si, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundss_i64(a: __m128, rounding: i32) -> i64 { +#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundss_i64(a: __m128) -> i64 { let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2si64(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtss2si64(a, ROUNDING); transmute(r) } @@ -409,16 +354,11 @@ pub unsafe fn _mm_cvt_roundss_i64(a: __m128, rounding: i32) -> i64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_u64&expand=1377) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2usi, rounding = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvt_roundss_u64(a: __m128, rounding: i32) -> u64 { +#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundss_u64(a: __m128) -> u64 { let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2usi64(a, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vcvtss2usi64(a, ROUNDING); transmute(r) } @@ -428,16 +368,11 @@ pub unsafe fn _mm_cvt_roundss_u64(a: __m128, rounding: i32) -> u64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_si64&expand=1931) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundsd_si64(a: __m128d, sae: i32) -> i64 { +#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundsd_si64(a: __m128d) -> i64 { let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2si64(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtsd2si64(a, SAE); transmute(r) } @@ -447,16 +382,11 @@ pub unsafe fn _mm_cvtt_roundsd_si64(a: __m128d, sae: i32) -> i64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_i64&expand=1929) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundsd_i64(a: __m128d, sae: i32) -> i64 { +#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundsd_i64(a: __m128d) -> i64 { let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2si64(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtsd2si64(a, SAE); transmute(r) } @@ -466,16 +396,11 @@ pub unsafe fn _mm_cvtt_roundsd_i64(a: __m128d, sae: i32) -> i64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_u64&expand=1933) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtsd2usi, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundsd_u64(a: __m128d, sae: i32) -> u64 { +#[cfg_attr(test, assert_instr(vcvtsd2usi, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundsd_u64(a: __m128d) -> u64 { let a = a.as_f64x2(); - macro_rules! call { - ($imm4:expr) => { - vcvtsd2usi64(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtsd2usi64(a, SAE); transmute(r) } @@ -485,16 +410,11 @@ pub unsafe fn _mm_cvtt_roundsd_u64(a: __m128d, sae: i32) -> u64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_i64&expand=1935) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2si, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundss_i64(a: __m128, sae: i32) -> i64 { +#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundss_i64(a: __m128) -> i64 { let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2si64(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtss2si64(a, SAE); transmute(r) } @@ -504,16 +424,11 @@ pub unsafe fn _mm_cvtt_roundss_i64(a: __m128, sae: i32) -> i64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_si64&expand=1937) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2si, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundss_si64(a: __m128, sae: i32) -> i64 { +#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundss_si64(a: __m128) -> i64 { let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2si64(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtss2si64(a, SAE); transmute(r) } @@ -523,16 +438,11 @@ pub unsafe fn _mm_cvtt_roundss_si64(a: __m128, sae: i32) -> i64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_u64&expand=1939) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcvtss2usi, sae = 8))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_cvtt_roundss_u64(a: __m128, sae: i32) -> u64 { +#[cfg_attr(test, assert_instr(vcvtss2usi, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundss_u64(a: __m128) -> u64 { let a = a.as_f32x4(); - macro_rules! call { - ($imm4:expr) => { - vcvtss2usi64(a, $imm4) - }; - } - let r = constify_imm4_sae!(sae, call); + let r = vcvtss2usi64(a, SAE); transmute(r) } @@ -12197,7 +12107,7 @@ mod tests { unsafe fn test_mm_cvt_roundi64_ss() { let a = _mm_set_ps(0., -0.5, 1., -1.5); let b: i64 = 9; - let r = _mm_cvt_roundi64_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundi64_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(0., -0.5, 1., 9.); assert_eq_m128(r, e); } @@ -12206,7 +12116,7 @@ mod tests { unsafe fn test_mm_cvt_roundsi64_ss() { let a = _mm_set_ps(0., -0.5, 1., -1.5); let b: i64 = 9; - let r = _mm_cvt_roundsi64_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundsi64_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(0., -0.5, 1., 9.); assert_eq_m128(r, e); } @@ -12232,7 +12142,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundsd_si64() { let a = _mm_set_pd(1., -1.5); - let r = _mm_cvt_roundsd_si64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundsd_si64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: i64 = -1; assert_eq!(r, e); } @@ -12240,7 +12150,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundsd_i64() { let a = _mm_set_pd(1., -1.5); - let r = _mm_cvt_roundsd_i64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundsd_i64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: i64 = -1; assert_eq!(r, e); } @@ -12248,7 +12158,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundsd_u64() { let a = _mm_set_pd(1., f64::MAX); - let r = _mm_cvt_roundsd_u64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundsd_u64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: u64 = u64::MAX; assert_eq!(r, e); } @@ -12264,7 +12174,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundss_i64() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvt_roundss_i64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundss_i64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: i64 = -1; assert_eq!(r, e); } @@ -12272,7 +12182,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundss_si64() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvt_roundss_si64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundss_si64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: i64 = -1; assert_eq!(r, e); } @@ -12280,7 +12190,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvt_roundss_u64() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvt_roundss_u64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundss_u64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); let e: u64 = u64::MAX; assert_eq!(r, e); } @@ -12304,7 +12214,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundsd_i64() { let a = _mm_set_pd(1., -1.5); - let r = _mm_cvtt_roundsd_i64(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundsd_i64::<_MM_FROUND_CUR_DIRECTION>(a); let e: i64 = -2; assert_eq!(r, e); } @@ -12312,7 +12222,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundsd_si64() { let a = _mm_set_pd(1., -1.5); - let r = _mm_cvtt_roundsd_si64(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundsd_si64::<_MM_FROUND_CUR_DIRECTION>(a); let e: i64 = -2; assert_eq!(r, e); } @@ -12320,7 +12230,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundsd_u64() { let a = _mm_set_pd(1., -1.5); - let r = _mm_cvtt_roundsd_u64(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundsd_u64::<_MM_FROUND_CUR_DIRECTION>(a); let e: u64 = u64::MAX; assert_eq!(r, e); } @@ -12344,7 +12254,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundss_i64() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvtt_roundss_i64(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundss_i64::<_MM_FROUND_CUR_DIRECTION>(a); let e: i64 = -2; assert_eq!(r, e); } @@ -12352,7 +12262,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundss_si64() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvtt_roundss_si64(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundss_si64::<_MM_FROUND_CUR_DIRECTION>(a); let e: i64 = -2; assert_eq!(r, e); } @@ -12360,7 +12270,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm_cvtt_roundss_u64() { let a = _mm_set_ps(0., -0.5, 1., -1.5); - let r = _mm_cvtt_roundss_u64(a, _MM_FROUND_CUR_DIRECTION); + let r = _mm_cvtt_roundss_u64::<_MM_FROUND_CUR_DIRECTION>(a); let e: u64 = u64::MAX; assert_eq!(r, e); } @@ -12395,7 +12305,7 @@ mod tests { unsafe fn test_mm_cvt_roundu64_ss() { let a = _mm_set_ps(0., -0.5, 1., -1.5); let b: u64 = 9; - let r = _mm_cvt_roundu64_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundu64_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_ps(0., -0.5, 1., 9.); assert_eq_m128(r, e); } @@ -12404,7 +12314,7 @@ mod tests { unsafe fn test_mm_cvt_roundu64_sd() { let a = _mm_set_pd(1., -1.5); let b: u64 = 9; - let r = _mm_cvt_roundu64_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundu64_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_pd(1., 9.); assert_eq_m128d(r, e); } @@ -12413,7 +12323,7 @@ mod tests { unsafe fn test_mm_cvt_roundi64_sd() { let a = _mm_set_pd(1., -1.5); let b: i64 = 9; - let r = _mm_cvt_roundi64_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundi64_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_pd(1., 9.); assert_eq_m128d(r, e); } @@ -12422,7 +12332,7 @@ mod tests { unsafe fn test_mm_cvt_roundsi64_sd() { let a = _mm_set_pd(1., -1.5); let b: i64 = 9; - let r = _mm_cvt_roundsi64_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm_cvt_roundsi64_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm_set_pd(1., 9.); assert_eq_m128d(r, e); } From dc2774d7899b1d9720bd098bf082775ccfa24a87 Mon Sep 17 00:00:00 2001 From: jironglin Date: Thu, 4 Mar 2021 14:41:58 +0000 Subject: [PATCH 040/123] add static_assert --- crates/core_arch/src/x86_64/avx512f.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index 43906f7714..af62b2112c 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -148,6 +148,7 @@ pub unsafe fn _mm_cvttss_u64(a: __m128) -> u64 { #[cfg_attr(test, assert_instr(vcvtsi2sd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] pub unsafe fn _mm_cvt_roundi64_sd(a: __m128d, b: i64) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let r = vcvtsi2sd64(a, b, ROUNDING); transmute(r) @@ -167,6 +168,7 @@ pub unsafe fn _mm_cvt_roundi64_sd(a: __m128d, b: i64) -> __ #[cfg_attr(test, assert_instr(vcvtsi2sd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] pub unsafe fn _mm_cvt_roundsi64_sd(a: __m128d, b: i64) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let r = vcvtsi2sd64(a, b, ROUNDING); transmute(r) @@ -186,6 +188,7 @@ pub unsafe fn _mm_cvt_roundsi64_sd(a: __m128d, b: i64) -> _ #[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] pub unsafe fn _mm_cvt_roundi64_ss(a: __m128, b: i64) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let r = vcvtsi2ss64(a, b, ROUNDING); transmute(r) @@ -205,6 +208,7 @@ pub unsafe fn _mm_cvt_roundi64_ss(a: __m128, b: i64) -> __m #[cfg_attr(test, assert_instr(vcvtusi2sd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] pub unsafe fn _mm_cvt_roundu64_sd(a: __m128d, b: u64) -> __m128d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let r = vcvtusi2sd64(a, b, ROUNDING); transmute(r) @@ -224,6 +228,7 @@ pub unsafe fn _mm_cvt_roundu64_sd(a: __m128d, b: u64) -> __ #[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] pub unsafe fn _mm_cvt_roundsi64_ss(a: __m128, b: i64) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let r = vcvtsi2ss64(a, b, ROUNDING); transmute(r) @@ -243,6 +248,7 @@ pub unsafe fn _mm_cvt_roundsi64_ss(a: __m128, b: i64) -> __ #[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] pub unsafe fn _mm_cvt_roundu64_ss(a: __m128, b: u64) -> __m128 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let r = vcvtusi2ss64(a, b, ROUNDING); transmute(r) @@ -262,6 +268,7 @@ pub unsafe fn _mm_cvt_roundu64_ss(a: __m128, b: u64) -> __m #[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvt_roundsd_si64(a: __m128d) -> i64 { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let r = vcvtsd2si64(a, ROUNDING); transmute(r) @@ -281,6 +288,7 @@ pub unsafe fn _mm_cvt_roundsd_si64(a: __m128d) -> i64 { #[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvt_roundsd_i64(a: __m128d) -> i64 { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let r = vcvtsd2si64(a, ROUNDING); transmute(r) @@ -300,6 +308,7 @@ pub unsafe fn _mm_cvt_roundsd_i64(a: __m128d) -> i64 { #[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvt_roundsd_u64(a: __m128d) -> u64 { + static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let r = vcvtsd2usi64(a, ROUNDING); transmute(r) @@ -319,6 +328,7 @@ pub unsafe fn _mm_cvt_roundsd_u64(a: __m128d) -> u64 { #[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvt_roundss_si64(a: __m128) -> i64 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let r = vcvtss2si64(a, ROUNDING); transmute(r) @@ -338,6 +348,7 @@ pub unsafe fn _mm_cvt_roundss_si64(a: __m128) -> i64 { #[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvt_roundss_i64(a: __m128) -> i64 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let r = vcvtss2si64(a, ROUNDING); transmute(r) @@ -357,6 +368,7 @@ pub unsafe fn _mm_cvt_roundss_i64(a: __m128) -> i64 { #[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvt_roundss_u64(a: __m128) -> u64 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let r = vcvtss2usi64(a, ROUNDING); transmute(r) @@ -371,6 +383,7 @@ pub unsafe fn _mm_cvt_roundss_u64(a: __m128) -> u64 { #[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundsd_si64(a: __m128d) -> i64 { + static_assert_sae!(SAE); let a = a.as_f64x2(); let r = vcvtsd2si64(a, SAE); transmute(r) @@ -385,6 +398,7 @@ pub unsafe fn _mm_cvtt_roundsd_si64(a: __m128d) -> i64 { #[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundsd_i64(a: __m128d) -> i64 { + static_assert_sae!(SAE); let a = a.as_f64x2(); let r = vcvtsd2si64(a, SAE); transmute(r) @@ -399,6 +413,7 @@ pub unsafe fn _mm_cvtt_roundsd_i64(a: __m128d) -> i64 { #[cfg_attr(test, assert_instr(vcvtsd2usi, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundsd_u64(a: __m128d) -> u64 { + static_assert_sae!(SAE); let a = a.as_f64x2(); let r = vcvtsd2usi64(a, SAE); transmute(r) @@ -413,6 +428,7 @@ pub unsafe fn _mm_cvtt_roundsd_u64(a: __m128d) -> u64 { #[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundss_i64(a: __m128) -> i64 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let r = vcvtss2si64(a, SAE); transmute(r) @@ -427,6 +443,7 @@ pub unsafe fn _mm_cvtt_roundss_i64(a: __m128) -> i64 { #[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundss_si64(a: __m128) -> i64 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let r = vcvtss2si64(a, SAE); transmute(r) @@ -441,6 +458,7 @@ pub unsafe fn _mm_cvtt_roundss_si64(a: __m128) -> i64 { #[cfg_attr(test, assert_instr(vcvtss2usi, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundss_u64(a: __m128) -> u64 { + static_assert_sae!(SAE); let a = a.as_f32x4(); let r = vcvtss2usi64(a, SAE); transmute(r) From 06eb05c10c2ad77fb617f0b907124de75dd3540f Mon Sep 17 00:00:00 2001 From: jironglin Date: Thu, 4 Mar 2021 15:00:58 +0000 Subject: [PATCH 041/123] fix x86_64/macro --- crates/core_arch/src/x86_64/macros.rs | 47 ++++++++++++++------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/crates/core_arch/src/x86_64/macros.rs b/crates/core_arch/src/x86_64/macros.rs index e3682d40fe..cafa37dd6f 100644 --- a/crates/core_arch/src/x86_64/macros.rs +++ b/crates/core_arch/src/x86_64/macros.rs @@ -1,32 +1,33 @@ //! Utility macros. -// For round instructions, the only valid values for rounding are 4, 8, 9, 10 and 11. -// This macro enforces that. +// Helper struct used to trigger const eval errors when the const generic immediate value `imm` is +// not a round number. +pub(crate) struct ValidateConstRound; +impl ValidateConstRound { + pub(crate) const VALID: () = { + let _ = 1 / ((IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11) as usize); + }; +} + #[allow(unused)] -macro_rules! constify_imm4_round { - ($imm8:expr, $expand:ident) => { - #[allow(overflowing_literals)] - match ($imm8) & 0b1111 { - 4 => $expand!(4), - 8 => $expand!(8), - 9 => $expand!(9), - 10 => $expand!(10), - 11 => $expand!(11), - _ => panic!("Invalid round value"), - } +macro_rules! static_assert_rounding { + ($imm:ident) => { + let _ = $crate::core_arch::x86_64::macros::ValidateConstRound::<$imm>::VALID; + }; +} + +// Helper struct used to trigger const eval errors when the const generic immediate value `imm` is +// not a sae number. +pub(crate) struct ValidateConstSae; +impl ValidateConstSae { + pub(crate) const VALID: () = { + let _ = 1 / ((IMM == 4 || IMM == 8) as usize); }; } -// For sae instructions, the only valid values for sae are 4 and 8. -// This macro enforces that. #[allow(unused)] -macro_rules! constify_imm4_sae { - ($imm8:expr, $expand:ident) => { - #[allow(overflowing_literals)] - match ($imm8) & 0b1111 { - 4 => $expand!(4), - 8 => $expand!(8), - _ => panic!("Invalid sae value"), - } +macro_rules! static_assert_sae { + ($imm:ident) => { + let _ = $crate::core_arch::x86_64::macros::ValidateConstSae::<$imm>::VALID; }; } From eacd3929832f0d9fa38c37ad1eaa0332faadc792 Mon Sep 17 00:00:00 2001 From: jironglin Date: Thu, 4 Mar 2021 15:31:00 +0000 Subject: [PATCH 042/123] remove x86/macro imm4_sae,imm4_rounding --- crates/core_arch/src/x86/macros.rs | 31 ------------------------------ 1 file changed, 31 deletions(-) diff --git a/crates/core_arch/src/x86/macros.rs b/crates/core_arch/src/x86/macros.rs index 4f160221d6..47ceaeb20a 100644 --- a/crates/core_arch/src/x86/macros.rs +++ b/crates/core_arch/src/x86/macros.rs @@ -184,37 +184,6 @@ macro_rules! constify_imm8_gather { }; } -// For round instructions, the only valid values for rounding are 4, 8, 9, 10 and 11. -// This macro enforces that. -#[allow(unused)] -macro_rules! constify_imm4_round { - ($imm8:expr, $expand:ident) => { - #[allow(overflowing_literals)] - match ($imm8) & 0b1111 { - 4 => $expand!(4), - 8 => $expand!(8), - 9 => $expand!(9), - 10 => $expand!(10), - 11 => $expand!(11), - _ => panic!("Invalid round value"), - } - }; -} - -// For sae instructions, the only valid values for sae are 4 and 8. -// This macro enforces that. -#[allow(unused)] -macro_rules! constify_imm4_sae { - ($imm8:expr, $expand:ident) => { - #[allow(overflowing_literals)] - match ($imm8) & 0b1111 { - 4 => $expand!(4), - 8 => $expand!(8), - _ => panic!("Invalid sae value"), - } - }; -} - // Two mantissas parameters. // This macro enforces that. #[allow(unused)] From 77a89e0088eb31361717d9e160f753b0dd64e8fd Mon Sep 17 00:00:00 2001 From: jironglin Date: Thu, 4 Mar 2021 16:56:42 +0000 Subject: [PATCH 043/123] shldi,shrdi_epi64,epi32,epi16 --- crates/core_arch/src/x86/avx512vbmi2.rs | 865 +++++++++++++----------- 1 file changed, 468 insertions(+), 397 deletions(-) diff --git a/crates/core_arch/src/x86/avx512vbmi2.rs b/crates/core_arch/src/x86/avx512vbmi2.rs index 032bce9176..b7a385dd97 100644 --- a/crates/core_arch/src/x86/avx512vbmi2.rs +++ b/crates/core_arch/src/x86/avx512vbmi2.rs @@ -920,14 +920,15 @@ pub unsafe fn _mm_maskz_shrdv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m1 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shldi_epi64&expand=5060) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_shldi_epi64(a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_shldi_epi64(a: __m512i, b: __m512i) -> __m512i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; transmute(vpshldvq( a.as_i64x8(), b.as_i64x8(), - _mm512_set1_epi64(imm8 as i64).as_i64x8(), + _mm512_set1_epi64(imm8).as_i64x8(), )) } @@ -936,20 +937,20 @@ pub unsafe fn _mm512_shldi_epi64(a: __m512i, b: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shldi_epi64&expand=5058) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_shldi_epi64( +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_shldi_epi64( src: __m512i, k: __mmask8, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; let shf: i64x8 = vpshldvq( a.as_i64x8(), b.as_i64x8(), - _mm512_set1_epi64(imm8 as i64).as_i64x8(), + _mm512_set1_epi64(imm8).as_i64x8(), ); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } @@ -959,14 +960,19 @@ pub unsafe fn _mm512_mask_shldi_epi64( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shldi_epi64&expand=5059) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_shldi_epi64(k: __mmask8, a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_shldi_epi64( + k: __mmask8, + a: __m512i, + b: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; let shf: i64x8 = vpshldvq( a.as_i64x8(), b.as_i64x8(), - _mm512_set1_epi64(imm8 as i64).as_i64x8(), + _mm512_set1_epi64(imm8).as_i64x8(), ); let zero = _mm512_setzero_si512().as_i64x8(); transmute(simd_select_bitmask(k, shf, zero)) @@ -977,14 +983,15 @@ pub unsafe fn _mm512_maskz_shldi_epi64(k: __mmask8, a: __m512i, b: __m512i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shldi_epi64&expand=5057) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_shldi_epi64(a: __m256i, b: __m256i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_shldi_epi64(a: __m256i, b: __m256i) -> __m256i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; transmute(vpshldvq256( a.as_i64x4(), b.as_i64x4(), - _mm256_set1_epi64x(imm8 as i64).as_i64x4(), + _mm256_set1_epi64x(imm8).as_i64x4(), )) } @@ -993,20 +1000,20 @@ pub unsafe fn _mm256_shldi_epi64(a: __m256i, b: __m256i, imm8: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shldi_epi64&expand=5055) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_shldi_epi64( +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_shldi_epi64( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; let shf: i64x4 = vpshldvq256( a.as_i64x4(), b.as_i64x4(), - _mm256_set1_epi64x(imm8 as i64).as_i64x4(), + _mm256_set1_epi64x(imm8).as_i64x4(), ); transmute(simd_select_bitmask(k, shf, src.as_i64x4())) } @@ -1016,14 +1023,19 @@ pub unsafe fn _mm256_mask_shldi_epi64( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shldi_epi64&expand=5056) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_maskz_shldi_epi64(k: __mmask8, a: __m256i, b: __m256i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_maskz_shldi_epi64( + k: __mmask8, + a: __m256i, + b: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; let shf: i64x4 = vpshldvq256( a.as_i64x4(), b.as_i64x4(), - _mm256_set1_epi64x(imm8 as i64).as_i64x4(), + _mm256_set1_epi64x(imm8).as_i64x4(), ); let zero = _mm256_setzero_si256().as_i64x4(); transmute(simd_select_bitmask(k, shf, zero)) @@ -1034,14 +1046,15 @@ pub unsafe fn _mm256_maskz_shldi_epi64(k: __mmask8, a: __m256i, b: __m256i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shldi_epi64&expand=5054) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_shldi_epi64(a: __m128i, b: __m128i, imm8: i32) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_shldi_epi64(a: __m128i, b: __m128i) -> __m128i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; transmute(vpshldvq128( a.as_i64x2(), b.as_i64x2(), - _mm_set1_epi64x(imm8 as i64).as_i64x2(), + _mm_set1_epi64x(imm8).as_i64x2(), )) } @@ -1050,21 +1063,17 @@ pub unsafe fn _mm_shldi_epi64(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shldi_epi64&expand=5052) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_shldi_epi64( +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_shldi_epi64( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, - imm8: i32, ) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); - let shf: i64x2 = vpshldvq128( - a.as_i64x2(), - b.as_i64x2(), - _mm_set1_epi64x(imm8 as i64).as_i64x2(), - ); + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; + let shf: i64x2 = vpshldvq128(a.as_i64x2(), b.as_i64x2(), _mm_set1_epi64x(imm8).as_i64x2()); transmute(simd_select_bitmask(k, shf, src.as_i64x2())) } @@ -1073,15 +1082,16 @@ pub unsafe fn _mm_mask_shldi_epi64( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shldi_epi64&expand=5053) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_shldi_epi64(k: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); - let shf: i64x2 = vpshldvq128( - a.as_i64x2(), - b.as_i64x2(), - _mm_set1_epi64x(imm8 as i64).as_i64x2(), - ); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_shldi_epi64( + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; + let shf: i64x2 = vpshldvq128(a.as_i64x2(), b.as_i64x2(), _mm_set1_epi64x(imm8).as_i64x2()); let zero = _mm_setzero_si128().as_i64x2(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1091,14 +1101,14 @@ pub unsafe fn _mm_maskz_shldi_epi64(k: __mmask8, a: __m128i, b: __m128i, imm8: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shldi_epi32&expand=5051) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_shldi_epi32(a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_shldi_epi32(a: __m512i, b: __m512i) -> __m512i { + static_assert_imm8!(IMM8); transmute(vpshldvd( a.as_i32x16(), b.as_i32x16(), - _mm512_set1_epi32(imm8).as_i32x16(), + _mm512_set1_epi32(IMM8).as_i32x16(), )) } @@ -1107,20 +1117,19 @@ pub unsafe fn _mm512_shldi_epi32(a: __m512i, b: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shldi_epi32&expand=5049) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_shldi_epi32( +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_shldi_epi32( src: __m512i, k: __mmask16, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); + static_assert_imm8!(IMM8); let shf: i32x16 = vpshldvd( a.as_i32x16(), b.as_i32x16(), - _mm512_set1_epi32(imm8).as_i32x16(), + _mm512_set1_epi32(IMM8).as_i32x16(), ); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } @@ -1130,14 +1139,18 @@ pub unsafe fn _mm512_mask_shldi_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shldi_epi32&expand=5050) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_shldi_epi32(k: __mmask16, a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_shldi_epi32( + k: __mmask16, + a: __m512i, + b: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); let shf: i32x16 = vpshldvd( a.as_i32x16(), b.as_i32x16(), - _mm512_set1_epi32(imm8).as_i32x16(), + _mm512_set1_epi32(IMM8).as_i32x16(), ); let zero = _mm512_setzero_si512().as_i32x16(); transmute(simd_select_bitmask(k, shf, zero)) @@ -1148,14 +1161,14 @@ pub unsafe fn _mm512_maskz_shldi_epi32(k: __mmask16, a: __m512i, b: __m512i, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shldi_epi32&expand=5048) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_shldi_epi32(a: __m256i, b: __m256i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_shldi_epi32(a: __m256i, b: __m256i) -> __m256i { + static_assert_imm8!(IMM8); transmute(vpshldvd256( a.as_i32x8(), b.as_i32x8(), - _mm256_set1_epi32(imm8).as_i32x8(), + _mm256_set1_epi32(IMM8).as_i32x8(), )) } @@ -1164,20 +1177,19 @@ pub unsafe fn _mm256_shldi_epi32(a: __m256i, b: __m256i, imm8: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shldi_epi32&expand=5046) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_shldi_epi32( +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_shldi_epi32( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); + static_assert_imm8!(IMM8); let shf: i32x8 = vpshldvd256( a.as_i32x8(), b.as_i32x8(), - _mm256_set1_epi32(imm8).as_i32x8(), + _mm256_set1_epi32(IMM8).as_i32x8(), ); transmute(simd_select_bitmask(k, shf, src.as_i32x8())) } @@ -1187,14 +1199,18 @@ pub unsafe fn _mm256_mask_shldi_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shldi_epi32&expand=5047) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_maskz_shldi_epi32(k: __mmask8, a: __m256i, b: __m256i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_maskz_shldi_epi32( + k: __mmask8, + a: __m256i, + b: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); let shf: i32x8 = vpshldvd256( a.as_i32x8(), b.as_i32x8(), - _mm256_set1_epi32(imm8).as_i32x8(), + _mm256_set1_epi32(IMM8).as_i32x8(), ); let zero = _mm256_setzero_si256().as_i32x8(); transmute(simd_select_bitmask(k, shf, zero)) @@ -1205,14 +1221,14 @@ pub unsafe fn _mm256_maskz_shldi_epi32(k: __mmask8, a: __m256i, b: __m256i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shldi_epi32&expand=5045) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_shldi_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_shldi_epi32(a: __m128i, b: __m128i) -> __m128i { + static_assert_imm8!(IMM8); transmute(vpshldvd128( a.as_i32x4(), b.as_i32x4(), - _mm_set1_epi32(imm8).as_i32x4(), + _mm_set1_epi32(IMM8).as_i32x4(), )) } @@ -1221,17 +1237,16 @@ pub unsafe fn _mm_shldi_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shldi_epi32&expand=5043) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_shldi_epi32( +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_shldi_epi32( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, - imm8: i32, ) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); - let shf: i32x4 = vpshldvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(imm8).as_i32x4()); + static_assert_imm8!(IMM8); + let shf: i32x4 = vpshldvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(IMM8).as_i32x4()); transmute(simd_select_bitmask(k, shf, src.as_i32x4())) } @@ -1240,11 +1255,15 @@ pub unsafe fn _mm_mask_shldi_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shldi_epi32&expand=5044) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_shldi_epi32(k: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); - let shf: i32x4 = vpshldvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(imm8).as_i32x4()); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_shldi_epi32( + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); + let shf: i32x4 = vpshldvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(IMM8).as_i32x4()); let zero = _mm_setzero_si128().as_i32x4(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1254,14 +1273,15 @@ pub unsafe fn _mm_maskz_shldi_epi32(k: __mmask8, a: __m128i, b: __m128i, imm8: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shldi_epi16&expand=5042) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_shldi_epi16(a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_shldi_epi16(a: __m512i, b: __m512i) -> __m512i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; transmute(vpshldvw( a.as_i16x32(), b.as_i16x32(), - _mm512_set1_epi16(imm8 as i16).as_i16x32(), + _mm512_set1_epi16(imm8).as_i16x32(), )) } @@ -1270,20 +1290,20 @@ pub unsafe fn _mm512_shldi_epi16(a: __m512i, b: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shldi_epi16&expand=5040) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_shldi_epi16( +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_shldi_epi16( src: __m512i, k: __mmask32, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; let shf: i16x32 = vpshldvw( a.as_i16x32(), b.as_i16x32(), - _mm512_set1_epi16(imm8 as i16).as_i16x32(), + _mm512_set1_epi16(imm8).as_i16x32(), ); transmute(simd_select_bitmask(k, shf, src.as_i16x32())) } @@ -1293,14 +1313,19 @@ pub unsafe fn _mm512_mask_shldi_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shldi_epi16&expand=5041) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_shldi_epi16(k: __mmask32, a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_shldi_epi16( + k: __mmask32, + a: __m512i, + b: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; let shf: i16x32 = vpshldvw( a.as_i16x32(), b.as_i16x32(), - _mm512_set1_epi16(imm8 as i16).as_i16x32(), + _mm512_set1_epi16(imm8).as_i16x32(), ); let zero = _mm512_setzero_si512().as_i16x32(); transmute(simd_select_bitmask(k, shf, zero)) @@ -1311,14 +1336,15 @@ pub unsafe fn _mm512_maskz_shldi_epi16(k: __mmask32, a: __m512i, b: __m512i, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shldi_epi16&expand=5039) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_shldi_epi16(a: __m256i, b: __m256i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_shldi_epi16(a: __m256i, b: __m256i) -> __m256i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; transmute(vpshldvw256( a.as_i16x16(), b.as_i16x16(), - _mm256_set1_epi16(imm8 as i16).as_i16x16(), + _mm256_set1_epi16(imm8).as_i16x16(), )) } @@ -1327,20 +1353,20 @@ pub unsafe fn _mm256_shldi_epi16(a: __m256i, b: __m256i, imm8: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shldi_epi16&expand=5037) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_shldi_epi16( +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_shldi_epi16( src: __m256i, k: __mmask16, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; let shf: i16x16 = vpshldvw256( a.as_i16x16(), b.as_i16x16(), - _mm256_set1_epi16(imm8 as i16).as_i16x16(), + _mm256_set1_epi16(imm8).as_i16x16(), ); transmute(simd_select_bitmask(k, shf, src.as_i16x16())) } @@ -1350,13 +1376,19 @@ pub unsafe fn _mm256_mask_shldi_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shldi_epi16&expand=5038) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_maskz_shldi_epi16(k: __mmask16, a: __m256i, b: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_maskz_shldi_epi16( + k: __mmask16, + a: __m256i, + b: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; let shf: i16x16 = vpshldvw256( a.as_i16x16(), b.as_i16x16(), - _mm256_set1_epi16(imm8 as i16).as_i16x16(), + _mm256_set1_epi16(imm8).as_i16x16(), ); let zero = _mm256_setzero_si256().as_i16x16(); transmute(simd_select_bitmask(k, shf, zero)) @@ -1367,13 +1399,15 @@ pub unsafe fn _mm256_maskz_shldi_epi16(k: __mmask16, a: __m256i, b: __m256i, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shldi_epi16&expand=5036) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_shldi_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_shldi_epi16(a: __m128i, b: __m128i) -> __m128i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; transmute(vpshldvw128( a.as_i16x8(), b.as_i16x8(), - _mm_set1_epi16(imm8 as i16).as_i16x8(), + _mm_set1_epi16(imm8).as_i16x8(), )) } @@ -1382,20 +1416,17 @@ pub unsafe fn _mm_shldi_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shldi_epi16&expand=5034) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_shldi_epi16( +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_shldi_epi16( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, - imm8: i32, ) -> __m128i { - let shf: i16x8 = vpshldvw128( - a.as_i16x8(), - b.as_i16x8(), - _mm_set1_epi16(imm8 as i16).as_i16x8(), - ); + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; + let shf: i16x8 = vpshldvw128(a.as_i16x8(), b.as_i16x8(), _mm_set1_epi16(imm8).as_i16x8()); transmute(simd_select_bitmask(k, shf, src.as_i16x8())) } @@ -1404,14 +1435,16 @@ pub unsafe fn _mm_mask_shldi_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shldi_epi16&expand=5035) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_shldi_epi16(k: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __m128i { - let shf: i16x8 = vpshldvw128( - a.as_i16x8(), - b.as_i16x8(), - _mm_set1_epi16(imm8 as i16).as_i16x8(), - ); +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_shldi_epi16( + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; + let shf: i16x8 = vpshldvw128(a.as_i16x8(), b.as_i16x8(), _mm_set1_epi16(imm8).as_i16x8()); let zero = _mm_setzero_si128().as_i16x8(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1421,14 +1454,15 @@ pub unsafe fn _mm_maskz_shldi_epi16(k: __mmask8, a: __m128i, b: __m128i, imm8: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shrdi_epi64&expand=5114) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_shrdi_epi64(a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_shrdi_epi64(a: __m512i, b: __m512i) -> __m512i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; transmute(vpshrdvq( a.as_i64x8(), b.as_i64x8(), - _mm512_set1_epi64(imm8 as i64).as_i64x8(), + _mm512_set1_epi64(imm8).as_i64x8(), )) } @@ -1437,20 +1471,20 @@ pub unsafe fn _mm512_shrdi_epi64(a: __m512i, b: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shrdi_epi64&expand=5112) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_shrdi_epi64( +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_shrdi_epi64( src: __m512i, k: __mmask8, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; let shf: i64x8 = vpshrdvq( a.as_i64x8(), b.as_i64x8(), - _mm512_set1_epi64(imm8 as i64).as_i64x8(), + _mm512_set1_epi64(imm8).as_i64x8(), ); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } @@ -1460,14 +1494,19 @@ pub unsafe fn _mm512_mask_shrdi_epi64( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shrdi_epi64&expand=5113) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 255))] //should be vpshrdq -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_shrdi_epi64(k: __mmask8, a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 255))] //should be vpshrdq +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_shrdi_epi64( + k: __mmask8, + a: __m512i, + b: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; let shf: i64x8 = vpshrdvq( a.as_i64x8(), b.as_i64x8(), - _mm512_set1_epi64(imm8 as i64).as_i64x8(), + _mm512_set1_epi64(imm8).as_i64x8(), ); let zero = _mm512_setzero_si512().as_i64x8(); transmute(simd_select_bitmask(k, shf, zero)) @@ -1478,14 +1517,15 @@ pub unsafe fn _mm512_maskz_shrdi_epi64(k: __mmask8, a: __m512i, b: __m512i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shrdi_epi64&expand=5111) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_shrdi_epi64(a: __m256i, b: __m256i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_shrdi_epi64(a: __m256i, b: __m256i) -> __m256i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; transmute(vpshrdvq256( a.as_i64x4(), b.as_i64x4(), - _mm256_set1_epi64x(imm8 as i64).as_i64x4(), + _mm256_set1_epi64x(imm8).as_i64x4(), )) } @@ -1494,20 +1534,20 @@ pub unsafe fn _mm256_shrdi_epi64(a: __m256i, b: __m256i, imm8: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shrdi_epi64&expand=5109) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_shrdi_epi64( +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_shrdi_epi64( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; let shf: i64x4 = vpshrdvq256( a.as_i64x4(), b.as_i64x4(), - _mm256_set1_epi64x(imm8 as i64).as_i64x4(), + _mm256_set1_epi64x(imm8).as_i64x4(), ); transmute(simd_select_bitmask(k, shf, src.as_i64x4())) } @@ -1517,14 +1557,19 @@ pub unsafe fn _mm256_mask_shrdi_epi64( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shrdi_epi64&expand=5110) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_maskz_shrdi_epi64(k: __mmask8, a: __m256i, b: __m256i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_maskz_shrdi_epi64( + k: __mmask8, + a: __m256i, + b: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; let shf: i64x4 = vpshrdvq256( a.as_i64x4(), b.as_i64x4(), - _mm256_set1_epi64x(imm8 as i64).as_i64x4(), + _mm256_set1_epi64x(imm8).as_i64x4(), ); let zero = _mm256_setzero_si256().as_i64x4(); transmute(simd_select_bitmask(k, shf, zero)) @@ -1535,14 +1580,15 @@ pub unsafe fn _mm256_maskz_shrdi_epi64(k: __mmask8, a: __m256i, b: __m256i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shrdi_epi64&expand=5108) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq -#[rustc_args_required_const(2)] -pub unsafe fn _mm_shrdi_epi64(a: __m128i, b: __m128i, imm8: i32) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_shrdi_epi64(a: __m128i, b: __m128i) -> __m128i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; transmute(vpshrdvq128( a.as_i64x2(), b.as_i64x2(), - _mm_set1_epi64x(imm8 as i64).as_i64x2(), + _mm_set1_epi64x(imm8).as_i64x2(), )) } @@ -1551,21 +1597,17 @@ pub unsafe fn _mm_shrdi_epi64(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shrdi_epi64&expand=5106) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_shrdi_epi64( +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_shrdi_epi64( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, - imm8: i32, ) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); - let shf: i64x2 = vpshrdvq128( - a.as_i64x2(), - b.as_i64x2(), - _mm_set1_epi64x(imm8 as i64).as_i64x2(), - ); + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; + let shf: i64x2 = vpshrdvq128(a.as_i64x2(), b.as_i64x2(), _mm_set1_epi64x(imm8).as_i64x2()); transmute(simd_select_bitmask(k, shf, src.as_i64x2())) } @@ -1574,15 +1616,16 @@ pub unsafe fn _mm_mask_shrdi_epi64( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shrdi_epi64&expand=5107) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_shrdi_epi64(k: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); - let shf: i64x2 = vpshrdvq128( - a.as_i64x2(), - b.as_i64x2(), - _mm_set1_epi64x(imm8 as i64).as_i64x2(), - ); +#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_shrdi_epi64( + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i64; + let shf: i64x2 = vpshrdvq128(a.as_i64x2(), b.as_i64x2(), _mm_set1_epi64x(imm8).as_i64x2()); let zero = _mm_setzero_si128().as_i64x2(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1592,14 +1635,14 @@ pub unsafe fn _mm_maskz_shrdi_epi64(k: __mmask8, a: __m128i, b: __m128i, imm8: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shrdi_epi32&expand=5105) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_shrdi_epi32(a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_shrdi_epi32(a: __m512i, b: __m512i) -> __m512i { + static_assert_imm8!(IMM8); transmute(vpshrdvd( a.as_i32x16(), b.as_i32x16(), - _mm512_set1_epi32(imm8).as_i32x16(), + _mm512_set1_epi32(IMM8).as_i32x16(), )) } @@ -1608,20 +1651,19 @@ pub unsafe fn _mm512_shrdi_epi32(a: __m512i, b: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shrdi_epi32&expand=5103) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_shrdi_epi32( +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_shrdi_epi32( src: __m512i, k: __mmask16, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); + static_assert_imm8!(IMM8); let shf: i32x16 = vpshrdvd( a.as_i32x16(), b.as_i32x16(), - _mm512_set1_epi32(imm8).as_i32x16(), + _mm512_set1_epi32(IMM8).as_i32x16(), ); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } @@ -1631,14 +1673,18 @@ pub unsafe fn _mm512_mask_shrdi_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shrdi_epi32&expand=5104) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_shrdi_epi32(k: __mmask16, a: __m512i, b: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_shrdi_epi32( + k: __mmask16, + a: __m512i, + b: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); let shf: i32x16 = vpshrdvd( a.as_i32x16(), b.as_i32x16(), - _mm512_set1_epi32(imm8).as_i32x16(), + _mm512_set1_epi32(IMM8).as_i32x16(), ); let zero = _mm512_setzero_si512().as_i32x16(); transmute(simd_select_bitmask(k, shf, zero)) @@ -1649,14 +1695,14 @@ pub unsafe fn _mm512_maskz_shrdi_epi32(k: __mmask16, a: __m512i, b: __m512i, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shrdi_epi32&expand=5102) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_shrdi_epi32(a: __m256i, b: __m256i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_shrdi_epi32(a: __m256i, b: __m256i) -> __m256i { + static_assert_imm8!(IMM8); transmute(vpshrdvd256( a.as_i32x8(), b.as_i32x8(), - _mm256_set1_epi32(imm8).as_i32x8(), + _mm256_set1_epi32(IMM8).as_i32x8(), )) } @@ -1665,20 +1711,19 @@ pub unsafe fn _mm256_shrdi_epi32(a: __m256i, b: __m256i, imm8: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shrdi_epi32&expand=5100) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_shrdi_epi32( +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_shrdi_epi32( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); + static_assert_imm8!(IMM8); let shf: i32x8 = vpshrdvd256( a.as_i32x8(), b.as_i32x8(), - _mm256_set1_epi32(imm8).as_i32x8(), + _mm256_set1_epi32(IMM8).as_i32x8(), ); transmute(simd_select_bitmask(k, shf, src.as_i32x8())) } @@ -1688,14 +1733,18 @@ pub unsafe fn _mm256_mask_shrdi_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shrdi_epi32&expand=5101) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_maskz_shrdi_epi32(k: __mmask8, a: __m256i, b: __m256i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_maskz_shrdi_epi32( + k: __mmask8, + a: __m256i, + b: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); let shf: i32x8 = vpshrdvd256( a.as_i32x8(), b.as_i32x8(), - _mm256_set1_epi32(imm8).as_i32x8(), + _mm256_set1_epi32(IMM8).as_i32x8(), ); let zero = _mm256_setzero_si256().as_i32x8(); transmute(simd_select_bitmask(k, shf, zero)) @@ -1706,14 +1755,14 @@ pub unsafe fn _mm256_maskz_shrdi_epi32(k: __mmask8, a: __m256i, b: __m256i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shrdi_epi32&expand=5099) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd -#[rustc_args_required_const(2)] -pub unsafe fn _mm_shrdi_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_shrdi_epi32(a: __m128i, b: __m128i) -> __m128i { + static_assert_imm8!(IMM8); transmute(vpshrdvd128( a.as_i32x4(), b.as_i32x4(), - _mm_set1_epi32(imm8).as_i32x4(), + _mm_set1_epi32(IMM8).as_i32x4(), )) } @@ -1722,17 +1771,16 @@ pub unsafe fn _mm_shrdi_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shrdi_epi32&expand=5097) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_shrdi_epi32( +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_shrdi_epi32( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, - imm8: i32, ) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); - let shf: i32x4 = vpshrdvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(imm8).as_i32x4()); + static_assert_imm8!(IMM8); + let shf: i32x4 = vpshrdvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(IMM8).as_i32x4()); transmute(simd_select_bitmask(k, shf, src.as_i32x4())) } @@ -1741,11 +1789,15 @@ pub unsafe fn _mm_mask_shrdi_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shrdi_epi32&expand=5098) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_shrdi_epi32(k: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 255); - let shf: i32x4 = vpshrdvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(imm8).as_i32x4()); +#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_shrdi_epi32( + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); + let shf: i32x4 = vpshrdvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(IMM8).as_i32x4()); let zero = _mm_setzero_si128().as_i32x4(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1755,14 +1807,16 @@ pub unsafe fn _mm_maskz_shrdi_epi32(k: __mmask8, a: __m128i, b: __m128i, imm8: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shrdi_epi16&expand=5096) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_shrdi_epi16(a: __m512i, b: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_shrdi_epi16(a: __m512i, b: __m512i) -> __m512i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; assert!(imm8 >= 0 && imm8 <= 255); transmute(vpshrdvw( a.as_i16x32(), b.as_i16x32(), - _mm512_set1_epi16(imm8 as i16).as_i16x32(), + _mm512_set1_epi16(imm8).as_i16x32(), )) } @@ -1771,20 +1825,21 @@ pub unsafe fn _mm512_shrdi_epi16(a: __m512i, b: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shrdi_epi16&expand=5094) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_shrdi_epi16( +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_shrdi_epi16( src: __m512i, k: __mmask32, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; assert!(imm8 >= 0 && imm8 <= 255); let shf: i16x32 = vpshrdvw( a.as_i16x32(), b.as_i16x32(), - _mm512_set1_epi16(imm8 as i16).as_i16x32(), + _mm512_set1_epi16(imm8).as_i16x32(), ); transmute(simd_select_bitmask(k, shf, src.as_i16x32())) } @@ -1794,14 +1849,20 @@ pub unsafe fn _mm512_mask_shrdi_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shrdi_epi16&expand=5095) #[inline] #[target_feature(enable = "avx512vbmi2")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_shrdi_epi16(k: __mmask32, a: __m512i, b: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_shrdi_epi16( + k: __mmask32, + a: __m512i, + b: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; assert!(imm8 >= 0 && imm8 <= 255); let shf: i16x32 = vpshrdvw( a.as_i16x32(), b.as_i16x32(), - _mm512_set1_epi16(imm8 as i16).as_i16x32(), + _mm512_set1_epi16(imm8).as_i16x32(), ); let zero = _mm512_setzero_si512().as_i16x32(); transmute(simd_select_bitmask(k, shf, zero)) @@ -1812,14 +1873,16 @@ pub unsafe fn _mm512_maskz_shrdi_epi16(k: __mmask32, a: __m512i, b: __m512i, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shrdi_epi16&expand=5093) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_shrdi_epi16(a: __m256i, b: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_shrdi_epi16(a: __m256i, b: __m256i) -> __m256i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; assert!(imm8 >= 0 && imm8 <= 255); transmute(vpshrdvw256( a.as_i16x16(), b.as_i16x16(), - _mm256_set1_epi16(imm8 as i16).as_i16x16(), + _mm256_set1_epi16(imm8).as_i16x16(), )) } @@ -1828,20 +1891,21 @@ pub unsafe fn _mm256_shrdi_epi16(a: __m256i, b: __m256i, imm8: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shrdi_epi16&expand=5091) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_shrdi_epi16( +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_shrdi_epi16( src: __m256i, k: __mmask16, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; assert!(imm8 >= 0 && imm8 <= 255); let shf: i16x16 = vpshrdvw256( a.as_i16x16(), b.as_i16x16(), - _mm256_set1_epi16(imm8 as i16).as_i16x16(), + _mm256_set1_epi16(imm8).as_i16x16(), ); transmute(simd_select_bitmask(k, shf, src.as_i16x16())) } @@ -1851,13 +1915,19 @@ pub unsafe fn _mm256_mask_shrdi_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shrdi_epi16&expand=5092) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_maskz_shrdi_epi16(k: __mmask16, a: __m256i, b: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_maskz_shrdi_epi16( + k: __mmask16, + a: __m256i, + b: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; let shf: i16x16 = vpshrdvw256( a.as_i16x16(), b.as_i16x16(), - _mm256_set1_epi16(imm8 as i16).as_i16x16(), + _mm256_set1_epi16(imm8).as_i16x16(), ); let zero = _mm256_setzero_si256().as_i16x16(); transmute(simd_select_bitmask(k, shf, zero)) @@ -1868,13 +1938,15 @@ pub unsafe fn _mm256_maskz_shrdi_epi16(k: __mmask16, a: __m256i, b: __m256i, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shrdi_epi16&expand=5090) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw -#[rustc_args_required_const(2)] -pub unsafe fn _mm_shrdi_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_shrdi_epi16(a: __m128i, b: __m128i) -> __m128i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; transmute(vpshrdvw128( a.as_i16x8(), b.as_i16x8(), - _mm_set1_epi16(imm8 as i16).as_i16x8(), + _mm_set1_epi16(imm8).as_i16x8(), )) } @@ -1883,20 +1955,17 @@ pub unsafe fn _mm_shrdi_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shrdi_epi16&expand=5088) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_shrdi_epi16( +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_shrdi_epi16( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, - imm8: i32, ) -> __m128i { - let shf: i16x8 = vpshrdvw128( - a.as_i16x8(), - b.as_i16x8(), - _mm_set1_epi16(imm8 as i16).as_i16x8(), - ); + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; + let shf: i16x8 = vpshrdvw128(a.as_i16x8(), b.as_i16x8(), _mm_set1_epi16(imm8).as_i16x8()); transmute(simd_select_bitmask(k, shf, src.as_i16x8())) } @@ -1905,14 +1974,16 @@ pub unsafe fn _mm_mask_shrdi_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shrdi_epi16&expand=5089) #[inline] #[target_feature(enable = "avx512vbmi2,avx512vl")] -#[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_shrdi_epi16(k: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __m128i { - let shf: i16x8 = vpshrdvw128( - a.as_i16x8(), - b.as_i16x8(), - _mm_set1_epi16(imm8 as i16).as_i16x8(), - ); +#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_shrdi_epi16( + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); + let imm8 = IMM8 as i16; + let shf: i16x8 = vpshrdvw128(a.as_i16x8(), b.as_i16x8(), _mm_set1_epi16(imm8).as_i16x8()); let zero = _mm_setzero_si128().as_i16x8(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -2921,7 +2992,7 @@ mod tests { unsafe fn test_mm512_shldi_epi64() { let a = _mm512_set1_epi64(1); let b = _mm512_set1_epi64(1 << 63); - let r = _mm512_shldi_epi64(a, b, 2); + let r = _mm512_shldi_epi64::<2>(a, b); let e = _mm512_set1_epi64(6); assert_eq_m512i(r, e); } @@ -2930,9 +3001,9 @@ mod tests { unsafe fn test_mm512_mask_shldi_epi64() { let a = _mm512_set1_epi64(1); let b = _mm512_set1_epi64(1 << 63); - let r = _mm512_mask_shldi_epi64(a, 0, a, b, 2); + let r = _mm512_mask_shldi_epi64::<2>(a, 0, a, b); assert_eq_m512i(r, a); - let r = _mm512_mask_shldi_epi64(a, 0b11111111, a, b, 2); + let r = _mm512_mask_shldi_epi64::<2>(a, 0b11111111, a, b); let e = _mm512_set1_epi64(6); assert_eq_m512i(r, e); } @@ -2941,9 +3012,9 @@ mod tests { unsafe fn test_mm512_maskz_shldi_epi64() { let a = _mm512_set1_epi64(1); let b = _mm512_set1_epi64(1 << 63); - let r = _mm512_maskz_shldi_epi64(0, a, b, 2); + let r = _mm512_maskz_shldi_epi64::<2>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_shldi_epi64(0b11111111, a, b, 2); + let r = _mm512_maskz_shldi_epi64::<2>(0b11111111, a, b); let e = _mm512_set1_epi64(6); assert_eq_m512i(r, e); } @@ -2952,7 +3023,7 @@ mod tests { unsafe fn test_mm256_shldi_epi64() { let a = _mm256_set1_epi64x(1); let b = _mm256_set1_epi64x(1 << 63); - let r = _mm256_shldi_epi64(a, b, 2); + let r = _mm256_shldi_epi64::<2>(a, b); let e = _mm256_set1_epi64x(6); assert_eq_m256i(r, e); } @@ -2961,9 +3032,9 @@ mod tests { unsafe fn test_mm256_mask_shldi_epi64() { let a = _mm256_set1_epi64x(1); let b = _mm256_set1_epi64x(1 << 63); - let r = _mm256_mask_shldi_epi64(a, 0, a, b, 2); + let r = _mm256_mask_shldi_epi64::<2>(a, 0, a, b); assert_eq_m256i(r, a); - let r = _mm256_mask_shldi_epi64(a, 0b00001111, a, b, 2); + let r = _mm256_mask_shldi_epi64::<2>(a, 0b00001111, a, b); let e = _mm256_set1_epi64x(6); assert_eq_m256i(r, e); } @@ -2972,9 +3043,9 @@ mod tests { unsafe fn test_mm256_maskz_shldi_epi64() { let a = _mm256_set1_epi64x(1); let b = _mm256_set1_epi64x(1 << 63); - let r = _mm256_maskz_shldi_epi64(0, a, b, 2); + let r = _mm256_maskz_shldi_epi64::<2>(0, a, b); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_shldi_epi64(0b00001111, a, b, 2); + let r = _mm256_maskz_shldi_epi64::<2>(0b00001111, a, b); let e = _mm256_set1_epi64x(6); assert_eq_m256i(r, e); } @@ -2983,7 +3054,7 @@ mod tests { unsafe fn test_mm_shldi_epi64() { let a = _mm_set1_epi64x(1); let b = _mm_set1_epi64x(1 << 63); - let r = _mm_shldi_epi64(a, b, 2); + let r = _mm_shldi_epi64::<2>(a, b); let e = _mm_set1_epi64x(6); assert_eq_m128i(r, e); } @@ -2992,9 +3063,9 @@ mod tests { unsafe fn test_mm_mask_shldi_epi64() { let a = _mm_set1_epi64x(1); let b = _mm_set1_epi64x(1 << 63); - let r = _mm_mask_shldi_epi64(a, 0, a, b, 2); + let r = _mm_mask_shldi_epi64::<2>(a, 0, a, b); assert_eq_m128i(r, a); - let r = _mm_mask_shldi_epi64(a, 0b00000011, a, b, 2); + let r = _mm_mask_shldi_epi64::<2>(a, 0b00000011, a, b); let e = _mm_set1_epi64x(6); assert_eq_m128i(r, e); } @@ -3003,9 +3074,9 @@ mod tests { unsafe fn test_mm_maskz_shldi_epi64() { let a = _mm_set1_epi64x(1); let b = _mm_set1_epi64x(1 << 63); - let r = _mm_maskz_shldi_epi64(0, a, b, 2); + let r = _mm_maskz_shldi_epi64::<2>(0, a, b); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_shldi_epi64(0b00000011, a, b, 2); + let r = _mm_maskz_shldi_epi64::<2>(0b00000011, a, b); let e = _mm_set1_epi64x(6); assert_eq_m128i(r, e); } @@ -3014,7 +3085,7 @@ mod tests { unsafe fn test_mm512_shldi_epi32() { let a = _mm512_set1_epi32(1); let b = _mm512_set1_epi32(1 << 31); - let r = _mm512_shldi_epi32(a, b, 2); + let r = _mm512_shldi_epi32::<2>(a, b); let e = _mm512_set1_epi32(6); assert_eq_m512i(r, e); } @@ -3023,9 +3094,9 @@ mod tests { unsafe fn test_mm512_mask_shldi_epi32() { let a = _mm512_set1_epi32(1); let b = _mm512_set1_epi32(1 << 31); - let r = _mm512_mask_shldi_epi32(a, 0, a, b, 2); + let r = _mm512_mask_shldi_epi32::<2>(a, 0, a, b); assert_eq_m512i(r, a); - let r = _mm512_mask_shldi_epi32(a, 0b11111111_11111111, a, b, 2); + let r = _mm512_mask_shldi_epi32::<2>(a, 0b11111111_11111111, a, b); let e = _mm512_set1_epi32(6); assert_eq_m512i(r, e); } @@ -3034,9 +3105,9 @@ mod tests { unsafe fn test_mm512_maskz_shldi_epi32() { let a = _mm512_set1_epi32(1); let b = _mm512_set1_epi32(1 << 31); - let r = _mm512_maskz_shldi_epi32(0, a, b, 2); + let r = _mm512_maskz_shldi_epi32::<2>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_shldi_epi32(0b11111111_11111111, a, b, 2); + let r = _mm512_maskz_shldi_epi32::<2>(0b11111111_11111111, a, b); let e = _mm512_set1_epi32(6); assert_eq_m512i(r, e); } @@ -3045,7 +3116,7 @@ mod tests { unsafe fn test_mm256_shldi_epi32() { let a = _mm256_set1_epi32(1); let b = _mm256_set1_epi32(1 << 31); - let r = _mm256_shldi_epi32(a, b, 2); + let r = _mm256_shldi_epi32::<2>(a, b); let e = _mm256_set1_epi32(6); assert_eq_m256i(r, e); } @@ -3054,9 +3125,9 @@ mod tests { unsafe fn test_mm256_mask_shldi_epi32() { let a = _mm256_set1_epi32(1); let b = _mm256_set1_epi32(1 << 31); - let r = _mm256_mask_shldi_epi32(a, 0, a, b, 2); + let r = _mm256_mask_shldi_epi32::<2>(a, 0, a, b); assert_eq_m256i(r, a); - let r = _mm256_mask_shldi_epi32(a, 0b11111111, a, b, 2); + let r = _mm256_mask_shldi_epi32::<2>(a, 0b11111111, a, b); let e = _mm256_set1_epi32(6); assert_eq_m256i(r, e); } @@ -3065,9 +3136,9 @@ mod tests { unsafe fn test_mm256_maskz_shldi_epi32() { let a = _mm256_set1_epi32(1); let b = _mm256_set1_epi32(1 << 31); - let r = _mm256_maskz_shldi_epi32(0, a, b, 2); + let r = _mm256_maskz_shldi_epi32::<2>(0, a, b); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_shldi_epi32(0b11111111, a, b, 2); + let r = _mm256_maskz_shldi_epi32::<2>(0b11111111, a, b); let e = _mm256_set1_epi32(6); assert_eq_m256i(r, e); } @@ -3076,7 +3147,7 @@ mod tests { unsafe fn test_mm_shldi_epi32() { let a = _mm_set1_epi32(1); let b = _mm_set1_epi32(1 << 31); - let r = _mm_shldi_epi32(a, b, 2); + let r = _mm_shldi_epi32::<2>(a, b); let e = _mm_set1_epi32(6); assert_eq_m128i(r, e); } @@ -3085,9 +3156,9 @@ mod tests { unsafe fn test_mm_mask_shldi_epi32() { let a = _mm_set1_epi32(1); let b = _mm_set1_epi32(1 << 31); - let r = _mm_mask_shldi_epi32(a, 0, a, b, 2); + let r = _mm_mask_shldi_epi32::<2>(a, 0, a, b); assert_eq_m128i(r, a); - let r = _mm_mask_shldi_epi32(a, 0b00001111, a, b, 2); + let r = _mm_mask_shldi_epi32::<2>(a, 0b00001111, a, b); let e = _mm_set1_epi32(6); assert_eq_m128i(r, e); } @@ -3096,9 +3167,9 @@ mod tests { unsafe fn test_mm_maskz_shldi_epi32() { let a = _mm_set1_epi32(1); let b = _mm_set1_epi32(1 << 31); - let r = _mm_maskz_shldi_epi32(0, a, b, 2); + let r = _mm_maskz_shldi_epi32::<2>(0, a, b); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_shldi_epi32(0b00001111, a, b, 2); + let r = _mm_maskz_shldi_epi32::<2>(0b00001111, a, b); let e = _mm_set1_epi32(6); assert_eq_m128i(r, e); } @@ -3107,7 +3178,7 @@ mod tests { unsafe fn test_mm512_shldi_epi16() { let a = _mm512_set1_epi16(1); let b = _mm512_set1_epi16(1 << 15); - let r = _mm512_shldi_epi16(a, b, 2); + let r = _mm512_shldi_epi16::<2>(a, b); let e = _mm512_set1_epi16(6); assert_eq_m512i(r, e); } @@ -3116,9 +3187,9 @@ mod tests { unsafe fn test_mm512_mask_shldi_epi16() { let a = _mm512_set1_epi16(1); let b = _mm512_set1_epi16(1 << 15); - let r = _mm512_mask_shldi_epi16(a, 0, a, b, 2); + let r = _mm512_mask_shldi_epi16::<2>(a, 0, a, b); assert_eq_m512i(r, a); - let r = _mm512_mask_shldi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b, 2); + let r = _mm512_mask_shldi_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a, b); let e = _mm512_set1_epi16(6); assert_eq_m512i(r, e); } @@ -3127,9 +3198,9 @@ mod tests { unsafe fn test_mm512_maskz_shldi_epi16() { let a = _mm512_set1_epi16(1); let b = _mm512_set1_epi16(1 << 15); - let r = _mm512_maskz_shldi_epi16(0, a, b, 2); + let r = _mm512_maskz_shldi_epi16::<2>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_shldi_epi16(0b11111111_11111111_11111111_11111111, a, b, 2); + let r = _mm512_maskz_shldi_epi16::<2>(0b11111111_11111111_11111111_11111111, a, b); let e = _mm512_set1_epi16(6); assert_eq_m512i(r, e); } @@ -3138,7 +3209,7 @@ mod tests { unsafe fn test_mm256_shldi_epi16() { let a = _mm256_set1_epi16(1); let b = _mm256_set1_epi16(1 << 15); - let r = _mm256_shldi_epi16(a, b, 2); + let r = _mm256_shldi_epi16::<2>(a, b); let e = _mm256_set1_epi16(6); assert_eq_m256i(r, e); } @@ -3147,9 +3218,9 @@ mod tests { unsafe fn test_mm256_mask_shldi_epi16() { let a = _mm256_set1_epi16(1); let b = _mm256_set1_epi16(1 << 15); - let r = _mm256_mask_shldi_epi16(a, 0, a, b, 2); + let r = _mm256_mask_shldi_epi16::<2>(a, 0, a, b); assert_eq_m256i(r, a); - let r = _mm256_mask_shldi_epi16(a, 0b11111111_11111111, a, b, 2); + let r = _mm256_mask_shldi_epi16::<2>(a, 0b11111111_11111111, a, b); let e = _mm256_set1_epi16(6); assert_eq_m256i(r, e); } @@ -3158,9 +3229,9 @@ mod tests { unsafe fn test_mm256_maskz_shldi_epi16() { let a = _mm256_set1_epi16(1); let b = _mm256_set1_epi16(1 << 15); - let r = _mm256_maskz_shldi_epi16(0, a, b, 2); + let r = _mm256_maskz_shldi_epi16::<2>(0, a, b); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_shldi_epi16(0b11111111_11111111, a, b, 2); + let r = _mm256_maskz_shldi_epi16::<2>(0b11111111_11111111, a, b); let e = _mm256_set1_epi16(6); assert_eq_m256i(r, e); } @@ -3169,7 +3240,7 @@ mod tests { unsafe fn test_mm_shldi_epi16() { let a = _mm_set1_epi16(1); let b = _mm_set1_epi16(1 << 15); - let r = _mm_shldi_epi16(a, b, 2); + let r = _mm_shldi_epi16::<2>(a, b); let e = _mm_set1_epi16(6); assert_eq_m128i(r, e); } @@ -3178,9 +3249,9 @@ mod tests { unsafe fn test_mm_mask_shldi_epi16() { let a = _mm_set1_epi16(1); let b = _mm_set1_epi16(1 << 15); - let r = _mm_mask_shldi_epi16(a, 0, a, b, 2); + let r = _mm_mask_shldi_epi16::<2>(a, 0, a, b); assert_eq_m128i(r, a); - let r = _mm_mask_shldi_epi16(a, 0b11111111, a, b, 2); + let r = _mm_mask_shldi_epi16::<2>(a, 0b11111111, a, b); let e = _mm_set1_epi16(6); assert_eq_m128i(r, e); } @@ -3189,9 +3260,9 @@ mod tests { unsafe fn test_mm_maskz_shldi_epi16() { let a = _mm_set1_epi16(1); let b = _mm_set1_epi16(1 << 15); - let r = _mm_maskz_shldi_epi16(0, a, b, 2); + let r = _mm_maskz_shldi_epi16::<2>(0, a, b); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_shldi_epi16(0b11111111, a, b, 2); + let r = _mm_maskz_shldi_epi16::<2>(0b11111111, a, b); let e = _mm_set1_epi16(6); assert_eq_m128i(r, e); } @@ -3200,7 +3271,7 @@ mod tests { unsafe fn test_mm512_shrdi_epi64() { let a = _mm512_set1_epi64(8); let b = _mm512_set1_epi64(2); - let r = _mm512_shrdi_epi64(a, b, 1); + let r = _mm512_shrdi_epi64::<1>(a, b); let e = _mm512_set1_epi64(1); assert_eq_m512i(r, e); } @@ -3209,9 +3280,9 @@ mod tests { unsafe fn test_mm512_mask_shrdi_epi64() { let a = _mm512_set1_epi64(8); let b = _mm512_set1_epi64(2); - let r = _mm512_mask_shrdi_epi64(a, 0, a, b, 1); + let r = _mm512_mask_shrdi_epi64::<1>(a, 0, a, b); assert_eq_m512i(r, a); - let r = _mm512_mask_shrdi_epi64(a, 0b11111111, a, b, 1); + let r = _mm512_mask_shrdi_epi64::<1>(a, 0b11111111, a, b); let e = _mm512_set1_epi64(1); assert_eq_m512i(r, e); } @@ -3220,9 +3291,9 @@ mod tests { unsafe fn test_mm512_maskz_shrdi_epi64() { let a = _mm512_set1_epi64(8); let b = _mm512_set1_epi64(2); - let r = _mm512_maskz_shrdi_epi64(0, a, b, 1); + let r = _mm512_maskz_shrdi_epi64::<1>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_shrdi_epi64(0b11111111, a, b, 1); + let r = _mm512_maskz_shrdi_epi64::<1>(0b11111111, a, b); let e = _mm512_set1_epi64(1); assert_eq_m512i(r, e); } @@ -3231,7 +3302,7 @@ mod tests { unsafe fn test_mm256_shrdi_epi64() { let a = _mm256_set1_epi64x(8); let b = _mm256_set1_epi64x(2); - let r = _mm256_shrdi_epi64(a, b, 1); + let r = _mm256_shrdi_epi64::<1>(a, b); let e = _mm256_set1_epi64x(1); assert_eq_m256i(r, e); } @@ -3240,9 +3311,9 @@ mod tests { unsafe fn test_mm256_mask_shrdi_epi64() { let a = _mm256_set1_epi64x(8); let b = _mm256_set1_epi64x(2); - let r = _mm256_mask_shrdi_epi64(a, 0, a, b, 1); + let r = _mm256_mask_shrdi_epi64::<1>(a, 0, a, b); assert_eq_m256i(r, a); - let r = _mm256_mask_shrdi_epi64(a, 0b00001111, a, b, 1); + let r = _mm256_mask_shrdi_epi64::<1>(a, 0b00001111, a, b); let e = _mm256_set1_epi64x(1); assert_eq_m256i(r, e); } @@ -3251,9 +3322,9 @@ mod tests { unsafe fn test_mm256_maskz_shrdi_epi64() { let a = _mm256_set1_epi64x(8); let b = _mm256_set1_epi64x(2); - let r = _mm256_maskz_shrdi_epi64(0, a, b, 1); + let r = _mm256_maskz_shrdi_epi64::<1>(0, a, b); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_shrdi_epi64(0b00001111, a, b, 1); + let r = _mm256_maskz_shrdi_epi64::<1>(0b00001111, a, b); let e = _mm256_set1_epi64x(1); assert_eq_m256i(r, e); } @@ -3262,7 +3333,7 @@ mod tests { unsafe fn test_mm_shrdi_epi64() { let a = _mm_set1_epi64x(8); let b = _mm_set1_epi64x(2); - let r = _mm_shrdi_epi64(a, b, 1); + let r = _mm_shrdi_epi64::<1>(a, b); let e = _mm_set1_epi64x(1); assert_eq_m128i(r, e); } @@ -3271,9 +3342,9 @@ mod tests { unsafe fn test_mm_mask_shrdi_epi64() { let a = _mm_set1_epi64x(8); let b = _mm_set1_epi64x(2); - let r = _mm_mask_shrdi_epi64(a, 0, a, b, 1); + let r = _mm_mask_shrdi_epi64::<1>(a, 0, a, b); assert_eq_m128i(r, a); - let r = _mm_mask_shrdi_epi64(a, 0b00000011, a, b, 1); + let r = _mm_mask_shrdi_epi64::<1>(a, 0b00000011, a, b); let e = _mm_set1_epi64x(1); assert_eq_m128i(r, e); } @@ -3282,9 +3353,9 @@ mod tests { unsafe fn test_mm_maskz_shrdi_epi64() { let a = _mm_set1_epi64x(8); let b = _mm_set1_epi64x(2); - let r = _mm_maskz_shrdi_epi64(0, a, b, 1); + let r = _mm_maskz_shrdi_epi64::<1>(0, a, b); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_shrdi_epi64(0b00000011, a, b, 1); + let r = _mm_maskz_shrdi_epi64::<1>(0b00000011, a, b); let e = _mm_set1_epi64x(1); assert_eq_m128i(r, e); } @@ -3293,7 +3364,7 @@ mod tests { unsafe fn test_mm512_shrdi_epi32() { let a = _mm512_set1_epi32(8); let b = _mm512_set1_epi32(2); - let r = _mm512_shrdi_epi32(a, b, 1); + let r = _mm512_shrdi_epi32::<1>(a, b); let e = _mm512_set1_epi32(1); assert_eq_m512i(r, e); } @@ -3302,9 +3373,9 @@ mod tests { unsafe fn test_mm512_mask_shrdi_epi32() { let a = _mm512_set1_epi32(8); let b = _mm512_set1_epi32(2); - let r = _mm512_mask_shrdi_epi32(a, 0, a, b, 1); + let r = _mm512_mask_shrdi_epi32::<1>(a, 0, a, b); assert_eq_m512i(r, a); - let r = _mm512_mask_shrdi_epi32(a, 0b11111111_11111111, a, b, 1); + let r = _mm512_mask_shrdi_epi32::<1>(a, 0b11111111_11111111, a, b); let e = _mm512_set1_epi32(1); assert_eq_m512i(r, e); } @@ -3313,9 +3384,9 @@ mod tests { unsafe fn test_mm512_maskz_shrdi_epi32() { let a = _mm512_set1_epi32(8); let b = _mm512_set1_epi32(2); - let r = _mm512_maskz_shrdi_epi32(0, a, b, 1); + let r = _mm512_maskz_shrdi_epi32::<1>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_shrdi_epi32(0b11111111_11111111, a, b, 1); + let r = _mm512_maskz_shrdi_epi32::<1>(0b11111111_11111111, a, b); let e = _mm512_set1_epi32(1); assert_eq_m512i(r, e); } @@ -3324,7 +3395,7 @@ mod tests { unsafe fn test_mm256_shrdi_epi32() { let a = _mm256_set1_epi32(8); let b = _mm256_set1_epi32(2); - let r = _mm256_shrdi_epi32(a, b, 1); + let r = _mm256_shrdi_epi32::<1>(a, b); let e = _mm256_set1_epi32(1); assert_eq_m256i(r, e); } @@ -3333,9 +3404,9 @@ mod tests { unsafe fn test_mm256_mask_shrdi_epi32() { let a = _mm256_set1_epi32(8); let b = _mm256_set1_epi32(2); - let r = _mm256_mask_shrdi_epi32(a, 0, a, b, 1); + let r = _mm256_mask_shrdi_epi32::<1>(a, 0, a, b); assert_eq_m256i(r, a); - let r = _mm256_mask_shrdi_epi32(a, 0b11111111, a, b, 1); + let r = _mm256_mask_shrdi_epi32::<1>(a, 0b11111111, a, b); let e = _mm256_set1_epi32(1); assert_eq_m256i(r, e); } @@ -3344,9 +3415,9 @@ mod tests { unsafe fn test_mm256_maskz_shrdi_epi32() { let a = _mm256_set1_epi32(8); let b = _mm256_set1_epi32(2); - let r = _mm256_maskz_shrdi_epi32(0, a, b, 1); + let r = _mm256_maskz_shrdi_epi32::<1>(0, a, b); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_shrdi_epi32(0b11111111, a, b, 1); + let r = _mm256_maskz_shrdi_epi32::<1>(0b11111111, a, b); let e = _mm256_set1_epi32(1); assert_eq_m256i(r, e); } @@ -3355,7 +3426,7 @@ mod tests { unsafe fn test_mm_shrdi_epi32() { let a = _mm_set1_epi32(8); let b = _mm_set1_epi32(2); - let r = _mm_shrdi_epi32(a, b, 1); + let r = _mm_shrdi_epi32::<1>(a, b); let e = _mm_set1_epi32(1); assert_eq_m128i(r, e); } @@ -3364,9 +3435,9 @@ mod tests { unsafe fn test_mm_mask_shrdi_epi32() { let a = _mm_set1_epi32(8); let b = _mm_set1_epi32(2); - let r = _mm_mask_shrdi_epi32(a, 0, a, b, 1); + let r = _mm_mask_shrdi_epi32::<1>(a, 0, a, b); assert_eq_m128i(r, a); - let r = _mm_mask_shrdi_epi32(a, 0b00001111, a, b, 1); + let r = _mm_mask_shrdi_epi32::<1>(a, 0b00001111, a, b); let e = _mm_set1_epi32(1); assert_eq_m128i(r, e); } @@ -3375,9 +3446,9 @@ mod tests { unsafe fn test_mm_maskz_shrdi_epi32() { let a = _mm_set1_epi32(8); let b = _mm_set1_epi32(2); - let r = _mm_maskz_shrdi_epi32(0, a, b, 1); + let r = _mm_maskz_shrdi_epi32::<1>(0, a, b); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_shrdi_epi32(0b00001111, a, b, 1); + let r = _mm_maskz_shrdi_epi32::<1>(0b00001111, a, b); let e = _mm_set1_epi32(1); assert_eq_m128i(r, e); } @@ -3386,7 +3457,7 @@ mod tests { unsafe fn test_mm512_shrdi_epi16() { let a = _mm512_set1_epi16(8); let b = _mm512_set1_epi16(2); - let r = _mm512_shrdi_epi16(a, b, 1); + let r = _mm512_shrdi_epi16::<1>(a, b); let e = _mm512_set1_epi16(1); assert_eq_m512i(r, e); } @@ -3395,9 +3466,9 @@ mod tests { unsafe fn test_mm512_mask_shrdi_epi16() { let a = _mm512_set1_epi16(8); let b = _mm512_set1_epi16(2); - let r = _mm512_mask_shrdi_epi16(a, 0, a, b, 1); + let r = _mm512_mask_shrdi_epi16::<1>(a, 0, a, b); assert_eq_m512i(r, a); - let r = _mm512_mask_shrdi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b, 1); + let r = _mm512_mask_shrdi_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a, b); let e = _mm512_set1_epi16(1); assert_eq_m512i(r, e); } @@ -3406,9 +3477,9 @@ mod tests { unsafe fn test_mm512_maskz_shrdi_epi16() { let a = _mm512_set1_epi16(8); let b = _mm512_set1_epi16(2); - let r = _mm512_maskz_shrdi_epi16(0, a, b, 1); + let r = _mm512_maskz_shrdi_epi16::<1>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_shrdi_epi16(0b11111111_11111111_11111111_11111111, a, b, 1); + let r = _mm512_maskz_shrdi_epi16::<1>(0b11111111_11111111_11111111_11111111, a, b); let e = _mm512_set1_epi16(1); assert_eq_m512i(r, e); } @@ -3417,7 +3488,7 @@ mod tests { unsafe fn test_mm256_shrdi_epi16() { let a = _mm256_set1_epi16(8); let b = _mm256_set1_epi16(2); - let r = _mm256_shrdi_epi16(a, b, 1); + let r = _mm256_shrdi_epi16::<1>(a, b); let e = _mm256_set1_epi16(1); assert_eq_m256i(r, e); } @@ -3426,9 +3497,9 @@ mod tests { unsafe fn test_mm256_mask_shrdi_epi16() { let a = _mm256_set1_epi16(8); let b = _mm256_set1_epi16(2); - let r = _mm256_mask_shrdi_epi16(a, 0, a, b, 1); + let r = _mm256_mask_shrdi_epi16::<1>(a, 0, a, b); assert_eq_m256i(r, a); - let r = _mm256_mask_shrdi_epi16(a, 0b11111111_11111111, a, b, 1); + let r = _mm256_mask_shrdi_epi16::<1>(a, 0b11111111_11111111, a, b); let e = _mm256_set1_epi16(1); assert_eq_m256i(r, e); } @@ -3437,9 +3508,9 @@ mod tests { unsafe fn test_mm256_maskz_shrdi_epi16() { let a = _mm256_set1_epi16(8); let b = _mm256_set1_epi16(2); - let r = _mm256_maskz_shrdi_epi16(0, a, b, 1); + let r = _mm256_maskz_shrdi_epi16::<1>(0, a, b); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_shrdi_epi16(0b11111111_11111111, a, b, 1); + let r = _mm256_maskz_shrdi_epi16::<1>(0b11111111_11111111, a, b); let e = _mm256_set1_epi16(1); assert_eq_m256i(r, e); } @@ -3448,7 +3519,7 @@ mod tests { unsafe fn test_mm_shrdi_epi16() { let a = _mm_set1_epi16(8); let b = _mm_set1_epi16(2); - let r = _mm_shrdi_epi16(a, b, 1); + let r = _mm_shrdi_epi16::<1>(a, b); let e = _mm_set1_epi16(1); assert_eq_m128i(r, e); } @@ -3457,9 +3528,9 @@ mod tests { unsafe fn test_mm_mask_shrdi_epi16() { let a = _mm_set1_epi16(8); let b = _mm_set1_epi16(2); - let r = _mm_mask_shrdi_epi16(a, 0, a, b, 1); + let r = _mm_mask_shrdi_epi16::<1>(a, 0, a, b); assert_eq_m128i(r, a); - let r = _mm_mask_shrdi_epi16(a, 0b11111111, a, b, 1); + let r = _mm_mask_shrdi_epi16::<1>(a, 0b11111111, a, b); let e = _mm_set1_epi16(1); assert_eq_m128i(r, e); } @@ -3468,9 +3539,9 @@ mod tests { unsafe fn test_mm_maskz_shrdi_epi16() { let a = _mm_set1_epi16(8); let b = _mm_set1_epi16(2); - let r = _mm_maskz_shrdi_epi16(0, a, b, 1); + let r = _mm_maskz_shrdi_epi16::<1>(0, a, b); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_shrdi_epi16(0b11111111, a, b, 1); + let r = _mm_maskz_shrdi_epi16::<1>(0b11111111, a, b); let e = _mm_set1_epi16(1); assert_eq_m128i(r, e); } From 59a5d0ea07302c468ef319b5253ea6f4be68a209 Mon Sep 17 00:00:00 2001 From: jironglin Date: Fri, 5 Mar 2021 00:33:09 +0000 Subject: [PATCH 044/123] ror_epi32,epi64, rol_epi32_epi64, srai_epi32 --- crates/core_arch/src/x86/avx512f.rs | 760 +++++++++++-------------- crates/core_arch/src/x86_64/avx512f.rs | 60 +- 2 files changed, 360 insertions(+), 460 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 5abe23e093..5e5104b618 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -16624,16 +16624,12 @@ pub unsafe fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rol_epi32&expand=4685) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_rol_epi32(a: __m512i) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vprold(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprold(a, IMM8); transmute(r) } @@ -16642,17 +16638,17 @@ pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rol_epi32&expand=4683) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_rol_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vprold(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, rol, src.as_i32x16())) + let r = vprold(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x16())) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -16660,18 +16656,14 @@ pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rol_epi32&expand=4684) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vprold(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); + let r = vprold(a, IMM8); let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, rol, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -16679,16 +16671,12 @@ pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rol_epi32&expand=4682) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm256_rol_epi32(a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm256_rol_epi32(a: __m256i) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vprold256(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprold256(a, IMM8); transmute(r) } @@ -16697,17 +16685,17 @@ pub unsafe fn _mm256_rol_epi32(a: __m256i, imm8: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rol_epi32&expand=4680) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_mask_rol_epi32(src: __m256i, k: __mmask8, a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_mask_rol_epi32( + src: __m256i, + k: __mmask8, + a: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vprold256(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, rol, src.as_i32x8())) + let r = vprold256(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x8())) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -16715,18 +16703,14 @@ pub unsafe fn _mm256_mask_rol_epi32(src: __m256i, k: __mmask8, a: __m256i, imm8: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rol_epi32&expand=4681) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_maskz_rol_epi32(k: __mmask8, a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_maskz_rol_epi32(k: __mmask8, a: __m256i) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vprold256(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); + let r = vprold256(a, IMM8); let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, rol, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -16734,16 +16718,12 @@ pub unsafe fn _mm256_maskz_rol_epi32(k: __mmask8, a: __m256i, imm8: i32) -> __m2 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rol_epi32&expand=4679) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_rol_epi32(a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_rol_epi32(a: __m128i) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i32x4(); - macro_rules! call { - ($imm8:expr) => { - vprold128(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprold128(a, IMM8); transmute(r) } @@ -16752,17 +16732,17 @@ pub unsafe fn _mm_rol_epi32(a: __m128i, imm8: i32) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rol_epi32&expand=4677) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_mask_rol_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_mask_rol_epi32( + src: __m128i, + k: __mmask8, + a: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i32x4(); - macro_rules! call { - ($imm8:expr) => { - vprold128(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, rol, src.as_i32x4())) + let r = vprold128(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x4())) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -16770,18 +16750,14 @@ pub unsafe fn _mm_mask_rol_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: i3 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rol_epi32&expand=4678) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_maskz_rol_epi32(k: __mmask8, a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_maskz_rol_epi32(k: __mmask8, a: __m128i) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i32x4(); - macro_rules! call { - ($imm8:expr) => { - vprold128(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); + let r = vprold128(a, IMM8); let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, rol, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -16789,16 +16765,12 @@ pub unsafe fn _mm_maskz_rol_epi32(k: __mmask8, a: __m128i, imm8: i32) -> __m128i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_ror_epi32&expand=4721) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_ror_epi32(a: __m512i) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vprord(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprord(a, IMM8); transmute(r) } @@ -16807,17 +16779,17 @@ pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_ror_epi32&expand=4719) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 123))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_ror_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vprord(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, ror, src.as_i32x16())) + let r = vprord(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x16())) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -16825,18 +16797,14 @@ pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_ror_epi32&expand=4720) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 123))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vprord(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); + let r = vprord(a, IMM8); let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, ror, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -16844,16 +16812,12 @@ pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_ror_epi32&expand=4718) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm256_ror_epi32(a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm256_ror_epi32(a: __m256i) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vprord256(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprord256(a, IMM8); transmute(r) } @@ -16862,17 +16826,17 @@ pub unsafe fn _mm256_ror_epi32(a: __m256i, imm8: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_ror_epi32&expand=4716) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 123))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_mask_ror_epi32(src: __m256i, k: __mmask8, a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_mask_ror_epi32( + src: __m256i, + k: __mmask8, + a: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vprord256(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, ror, src.as_i32x8())) + let r = vprord256(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x8())) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -16880,18 +16844,14 @@ pub unsafe fn _mm256_mask_ror_epi32(src: __m256i, k: __mmask8, a: __m256i, imm8: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_ror_epi32&expand=4717) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 123))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_maskz_ror_epi32(k: __mmask8, a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_maskz_ror_epi32(k: __mmask8, a: __m256i) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vprord256(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); + let r = vprord256(a, IMM8); let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, ror, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -16899,16 +16859,12 @@ pub unsafe fn _mm256_maskz_ror_epi32(k: __mmask8, a: __m256i, imm8: i32) -> __m2 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ror_epi32&expand=4715) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_ror_epi32(a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_ror_epi32(a: __m128i) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i32x4(); - macro_rules! call { - ($imm8:expr) => { - vprord128(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprord128(a, IMM8); transmute(r) } @@ -16917,17 +16873,17 @@ pub unsafe fn _mm_ror_epi32(a: __m128i, imm8: i32) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_ror_epi32&expand=4713) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 123))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_mask_ror_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_mask_ror_epi32( + src: __m128i, + k: __mmask8, + a: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i32x4(); - macro_rules! call { - ($imm8:expr) => { - vprord128(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, ror, src.as_i32x4())) + let r = vprord128(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x4())) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -16935,18 +16891,14 @@ pub unsafe fn _mm_mask_ror_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: i3 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_ror_epi32&expand=4714) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 123))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_maskz_ror_epi32(k: __mmask8, a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_maskz_ror_epi32(k: __mmask8, a: __m128i) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i32x4(); - macro_rules! call { - ($imm8:expr) => { - vprord128(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); + let r = vprord128(a, IMM8); let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, ror, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -16954,16 +16906,12 @@ pub unsafe fn _mm_maskz_ror_epi32(k: __mmask8, a: __m128i, imm8: i32) -> __m128i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rol_epi64&expand=4694) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_rol_epi64(a: __m512i) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vprolq(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprolq(a, IMM8); transmute(r) } @@ -16972,17 +16920,17 @@ pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rol_epi64&expand=4692) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_rol_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vprolq(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, rol, src.as_i64x8())) + let r = vprolq(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x8())) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -16990,18 +16938,14 @@ pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rol_epi64&expand=4693) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vprolq(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); + let r = vprolq(a, IMM8); let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, rol, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -17009,16 +16953,12 @@ pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m5 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rol_epi64&expand=4691) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm256_rol_epi64(a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm256_rol_epi64(a: __m256i) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i64x4(); - macro_rules! call { - ($imm8:expr) => { - vprolq256(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprolq256(a, IMM8); transmute(r) } @@ -17027,17 +16967,17 @@ pub unsafe fn _mm256_rol_epi64(a: __m256i, imm8: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rol_epi64&expand=4689) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_mask_rol_epi64(src: __m256i, k: __mmask8, a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_mask_rol_epi64( + src: __m256i, + k: __mmask8, + a: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i64x4(); - macro_rules! call { - ($imm8:expr) => { - vprolq256(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, rol, src.as_i64x4())) + let r = vprolq256(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x4())) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17045,18 +16985,14 @@ pub unsafe fn _mm256_mask_rol_epi64(src: __m256i, k: __mmask8, a: __m256i, imm8: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rol_epi64&expand=4690) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_maskz_rol_epi64(k: __mmask8, a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_maskz_rol_epi64(k: __mmask8, a: __m256i) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i64x4(); - macro_rules! call { - ($imm8:expr) => { - vprolq256(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); + let r = vprolq256(a, IMM8); let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, rol, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -17064,16 +17000,12 @@ pub unsafe fn _mm256_maskz_rol_epi64(k: __mmask8, a: __m256i, imm8: i32) -> __m2 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rol_epi64&expand=4688) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_rol_epi64(a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_rol_epi64(a: __m128i) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i64x2(); - macro_rules! call { - ($imm8:expr) => { - vprolq128(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprolq128(a, IMM8); transmute(r) } @@ -17082,17 +17014,17 @@ pub unsafe fn _mm_rol_epi64(a: __m128i, imm8: i32) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rol_epi64&expand=4686) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_mask_rol_epi64(src: __m128i, k: __mmask8, a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_mask_rol_epi64( + src: __m128i, + k: __mmask8, + a: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i64x2(); - macro_rules! call { - ($imm8:expr) => { - vprolq128(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, rol, src.as_i64x2())) + let r = vprolq128(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x2())) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17100,18 +17032,14 @@ pub unsafe fn _mm_mask_rol_epi64(src: __m128i, k: __mmask8, a: __m128i, imm8: i3 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rol_epi64&expand=4687) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_maskz_rol_epi64(k: __mmask8, a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_maskz_rol_epi64(k: __mmask8, a: __m128i) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i64x2(); - macro_rules! call { - ($imm8:expr) => { - vprolq128(a, $imm8) - }; - } - let rol = constify_imm8_sae!(imm8, call); + let r = vprolq128(a, IMM8); let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, rol, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -17119,16 +17047,12 @@ pub unsafe fn _mm_maskz_rol_epi64(k: __mmask8, a: __m128i, imm8: i32) -> __m128i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_ror_epi64&expand=4730) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_ror_epi64(a: __m512i) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vprorq(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprorq(a, IMM8); transmute(r) } @@ -17137,17 +17061,17 @@ pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_ror_epi64&expand=4728) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_ror_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vprorq(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, ror, src.as_i64x8())) + let r = vprorq(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x8())) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17155,18 +17079,14 @@ pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_ror_epi64&expand=4729) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vprorq(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); + let r = vprorq(a, IMM8); let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, ror, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -17174,16 +17094,12 @@ pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m5 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_ror_epi64&expand=4727) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm256_ror_epi64(a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm256_ror_epi64(a: __m256i) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i64x4(); - macro_rules! call { - ($imm8:expr) => { - vprorq256(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprorq256(a, IMM8); transmute(r) } @@ -17192,17 +17108,17 @@ pub unsafe fn _mm256_ror_epi64(a: __m256i, imm8: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_ror_epi64&expand=4725) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_mask_ror_epi64(src: __m256i, k: __mmask8, a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_mask_ror_epi64( + src: __m256i, + k: __mmask8, + a: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i64x4(); - macro_rules! call { - ($imm8:expr) => { - vprorq256(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, ror, src.as_i64x4())) + let r = vprorq256(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x4())) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17210,18 +17126,14 @@ pub unsafe fn _mm256_mask_ror_epi64(src: __m256i, k: __mmask8, a: __m256i, imm8: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_ror_epi64&expand=4726) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_maskz_ror_epi64(k: __mmask8, a: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_maskz_ror_epi64(k: __mmask8, a: __m256i) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i64x4(); - macro_rules! call { - ($imm8:expr) => { - vprorq256(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); + let r = vprorq256(a, IMM8); let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, ror, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -17229,16 +17141,12 @@ pub unsafe fn _mm256_maskz_ror_epi64(k: __mmask8, a: __m256i, imm8: i32) -> __m2 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ror_epi64&expand=4724) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_ror_epi64(a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_ror_epi64(a: __m128i) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i64x2(); - macro_rules! call { - ($imm8:expr) => { - vprorq128(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vprorq128(a, IMM8); transmute(r) } @@ -17247,17 +17155,17 @@ pub unsafe fn _mm_ror_epi64(a: __m128i, imm8: i32) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_ror_epi64&expand=4722) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_mask_ror_epi64(src: __m128i, k: __mmask8, a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_mask_ror_epi64( + src: __m128i, + k: __mmask8, + a: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i64x2(); - macro_rules! call { - ($imm8:expr) => { - vprorq128(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, ror, src.as_i64x2())) + let r = vprorq128(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x2())) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17265,18 +17173,14 @@ pub unsafe fn _mm_mask_ror_epi64(src: __m128i, k: __mmask8, a: __m128i, imm8: i3 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_ror_epi64&expand=4723) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_maskz_ror_epi64(k: __mmask8, a: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_maskz_ror_epi64(k: __mmask8, a: __m128i) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i64x2(); - macro_rules! call { - ($imm8:expr) => { - vprorq128(a, $imm8) - }; - } - let ror = constify_imm8_sae!(imm8, call); + let r = vprorq128(a, IMM8); let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, ror, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. @@ -18343,7 +18247,7 @@ pub unsafe fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __ pub unsafe fn _mm512_srai_epi32(a: __m512i) -> __m512i { static_assert_imm8u!(IMM8); let a = a.as_i32x16(); - let r = vpsraid(a, IMM8); + let r = vpsraid512(a, IMM8); transmute(r) } @@ -18361,7 +18265,7 @@ pub unsafe fn _mm512_mask_srai_epi32( ) -> __m512i { static_assert_imm8u!(IMM8); let a = a.as_i32x16(); - let r = vpsraid(a, IMM8); + let r = vpsraid512(a, IMM8); transmute(simd_select_bitmask(k, r, src.as_i32x16())) } @@ -18375,7 +18279,7 @@ pub unsafe fn _mm512_mask_srai_epi32( pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i) -> __m512i { static_assert_imm8u!(IMM8); let a = a.as_i32x16(); - let r = vpsraid(a, IMM8); + let r = vpsraid512(a, IMM8); let zero = _mm512_setzero_si512().as_i32x16(); transmute(simd_select_bitmask(k, r, zero)) } @@ -18385,16 +18289,16 @@ pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i) /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_srai_epi32&expand=5431) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_mask_srai_epi32(src: __m256i, k: __mmask8, a: __m256i, imm8: u32) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_srai_epi32::<$imm8>(a) - }; - } - let shf = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, shf.as_i32x8(), src.as_i32x8())) +#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_mask_srai_epi32( + src: __m256i, + k: __mmask8, + a: __m256i, +) -> __m256i { + let imm8 = IMM8 as i32; + let r = psraid256(a.as_i32x8(), imm8); + transmute(simd_select_bitmask(k, r, src.as_i32x8())) } /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18402,17 +18306,13 @@ pub unsafe fn _mm256_mask_srai_epi32(src: __m256i, k: __mmask8, a: __m256i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_srai_epi32&expand=5432) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_maskz_srai_epi32(k: __mmask8, a: __m256i, imm8: u32) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_srai_epi32::<$imm8>(a) - }; - } - let shf = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_maskz_srai_epi32(k: __mmask8, a: __m256i) -> __m256i { + let imm8 = IMM8 as i32; + let r = psraid256(a.as_i32x8(), imm8); let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, shf.as_i32x8(), zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18420,16 +18320,16 @@ pub unsafe fn _mm256_maskz_srai_epi32(k: __mmask8, a: __m256i, imm8: u32) -> __m /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_srai_epi32&expand=5428) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_mask_srai_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: u32) -> __m128i { - macro_rules! call { - ($imm8:expr) => { - _mm_srai_epi32::<$imm8>(a) - }; - } - let shf = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, shf.as_i32x4(), src.as_i32x4())) +#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_mask_srai_epi32( + src: __m128i, + k: __mmask8, + a: __m128i, +) -> __m128i { + let imm8 = IMM8 as i32; + let r = psraid128(a.as_i32x4(), imm8); + transmute(simd_select_bitmask(k, r, src.as_i32x4())) } /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18437,17 +18337,13 @@ pub unsafe fn _mm_mask_srai_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: u /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_srai_epi32&expand=5429) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_maskz_srai_epi32(k: __mmask8, a: __m128i, imm8: u32) -> __m128i { - macro_rules! call { - ($imm8:expr) => { - _mm_srai_epi32::<$imm8>(a) - }; - } - let shf = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_maskz_srai_epi32(k: __mmask8, a: __m128i) -> __m128i { + let imm8 = IMM8 as i32; + let r = psraid128(a.as_i32x4(), imm8); let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, shf.as_i32x4(), zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. @@ -37615,7 +37511,11 @@ extern "C" { fn vpsraq128(a: i64x2, count: i64x2) -> i64x2; #[link_name = "llvm.x86.avx512.psrai.d.512"] - fn vpsraid(a: i32x16, imm8: u32) -> i32x16; + fn vpsraid512(a: i32x16, imm8: u32) -> i32x16; + #[link_name = "llvm.x86.avx2.psrai.d"] + fn psraid256(a: i32x8, imm8: i32) -> i32x8; + #[link_name = "llvm.x86.sse2.psrai.d"] + fn psraid128(a: i32x4, imm8: i32) -> i32x4; #[link_name = "llvm.x86.avx512.psrai.q.512"] fn vpsraiq(a: i64x8, imm8: u32) -> i64x8; @@ -45648,7 +45548,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_rol_epi32() { let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); - let r = _mm512_rol_epi32(a, 1); + let r = _mm512_rol_epi32::<1>(a); let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); assert_eq_m512i(r, e); } @@ -45656,9 +45556,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_rol_epi32() { let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); - let r = _mm512_mask_rol_epi32(a, 0, a, 1); + let r = _mm512_mask_rol_epi32::<1>(a, 0, a); assert_eq_m512i(r, a); - let r = _mm512_mask_rol_epi32(a, 0b11111111_11111111, a, 1); + let r = _mm512_mask_rol_epi32::<1>(a, 0b11111111_11111111, a); let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); assert_eq_m512i(r, e); } @@ -45666,9 +45566,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_rol_epi32() { let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31); - let r = _mm512_maskz_rol_epi32(0, a, 1); + let r = _mm512_maskz_rol_epi32::<1>(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_rol_epi32(0b00000000_11111111, a, 1); + let r = _mm512_maskz_rol_epi32::<1>(0b00000000_11111111, a); let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0); assert_eq_m512i(r, e); } @@ -45676,7 +45576,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_rol_epi32() { let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); - let r = _mm256_rol_epi32(a, 1); + let r = _mm256_rol_epi32::<1>(a); let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); assert_eq_m256i(r, e); } @@ -45684,9 +45584,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_mask_rol_epi32() { let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); - let r = _mm256_mask_rol_epi32(a, 0, a, 1); + let r = _mm256_mask_rol_epi32::<1>(a, 0, a); assert_eq_m256i(r, a); - let r = _mm256_mask_rol_epi32(a, 0b11111111, a, 1); + let r = _mm256_mask_rol_epi32::<1>(a, 0b11111111, a); let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); assert_eq_m256i(r, e); } @@ -45694,9 +45594,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_maskz_rol_epi32() { let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); - let r = _mm256_maskz_rol_epi32(0, a, 1); + let r = _mm256_maskz_rol_epi32::<1>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_rol_epi32(0b11111111, a, 1); + let r = _mm256_maskz_rol_epi32::<1>(0b11111111, a); let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); assert_eq_m256i(r, e); } @@ -45704,7 +45604,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_rol_epi32() { let a = _mm_set_epi32(1 << 31, 1, 1, 1); - let r = _mm_rol_epi32(a, 1); + let r = _mm_rol_epi32::<1>(a); let e = _mm_set_epi32(1 << 0, 2, 2, 2); assert_eq_m128i(r, e); } @@ -45712,9 +45612,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_mask_rol_epi32() { let a = _mm_set_epi32(1 << 31, 1, 1, 1); - let r = _mm_mask_rol_epi32(a, 0, a, 1); + let r = _mm_mask_rol_epi32::<1>(a, 0, a); assert_eq_m128i(r, a); - let r = _mm_mask_rol_epi32(a, 0b00001111, a, 1); + let r = _mm_mask_rol_epi32::<1>(a, 0b00001111, a); let e = _mm_set_epi32(1 << 0, 2, 2, 2); assert_eq_m128i(r, e); } @@ -45722,9 +45622,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_maskz_rol_epi32() { let a = _mm_set_epi32(1 << 31, 1, 1, 1); - let r = _mm_maskz_rol_epi32(0, a, 1); + let r = _mm_maskz_rol_epi32::<1>(0, a); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_rol_epi32(0b00001111, a, 1); + let r = _mm_maskz_rol_epi32::<1>(0b00001111, a); let e = _mm_set_epi32(1 << 0, 2, 2, 2); assert_eq_m128i(r, e); } @@ -45732,7 +45632,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_ror_epi32() { let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); - let r = _mm512_ror_epi32(a, 1); + let r = _mm512_ror_epi32::<1>(a); let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); assert_eq_m512i(r, e); } @@ -45740,9 +45640,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_ror_epi32() { let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); - let r = _mm512_mask_ror_epi32(a, 0, a, 1); + let r = _mm512_mask_ror_epi32::<1>(a, 0, a); assert_eq_m512i(r, a); - let r = _mm512_mask_ror_epi32(a, 0b11111111_11111111, a, 1); + let r = _mm512_mask_ror_epi32::<1>(a, 0b11111111_11111111, a); let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); assert_eq_m512i(r, e); } @@ -45750,9 +45650,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_ror_epi32() { let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0); - let r = _mm512_maskz_ror_epi32(0, a, 1); + let r = _mm512_maskz_ror_epi32::<1>(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_ror_epi32(0b00000000_11111111, a, 1); + let r = _mm512_maskz_ror_epi32::<1>(0b00000000_11111111, a); let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31); assert_eq_m512i(r, e); } @@ -45760,7 +45660,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_ror_epi32() { let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); - let r = _mm256_ror_epi32(a, 1); + let r = _mm256_ror_epi32::<1>(a); let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); assert_eq_m256i(r, e); } @@ -45768,9 +45668,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_mask_ror_epi32() { let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); - let r = _mm256_mask_ror_epi32(a, 0, a, 1); + let r = _mm256_mask_ror_epi32::<1>(a, 0, a); assert_eq_m256i(r, a); - let r = _mm256_mask_ror_epi32(a, 0b11111111, a, 1); + let r = _mm256_mask_ror_epi32::<1>(a, 0b11111111, a); let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); assert_eq_m256i(r, e); } @@ -45778,9 +45678,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_maskz_ror_epi32() { let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2); - let r = _mm256_maskz_ror_epi32(0, a, 1); + let r = _mm256_maskz_ror_epi32::<1>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_ror_epi32(0b11111111, a, 1); + let r = _mm256_maskz_ror_epi32::<1>(0b11111111, a); let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1); assert_eq_m256i(r, e); } @@ -45788,7 +45688,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_ror_epi32() { let a = _mm_set_epi32(1 << 0, 2, 2, 2); - let r = _mm_ror_epi32(a, 1); + let r = _mm_ror_epi32::<1>(a); let e = _mm_set_epi32(1 << 31, 1, 1, 1); assert_eq_m128i(r, e); } @@ -45796,9 +45696,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_mask_ror_epi32() { let a = _mm_set_epi32(1 << 0, 2, 2, 2); - let r = _mm_mask_ror_epi32(a, 0, a, 1); + let r = _mm_mask_ror_epi32::<1>(a, 0, a); assert_eq_m128i(r, a); - let r = _mm_mask_ror_epi32(a, 0b00001111, a, 1); + let r = _mm_mask_ror_epi32::<1>(a, 0b00001111, a); let e = _mm_set_epi32(1 << 31, 1, 1, 1); assert_eq_m128i(r, e); } @@ -45806,9 +45706,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_maskz_ror_epi32() { let a = _mm_set_epi32(1 << 0, 2, 2, 2); - let r = _mm_maskz_ror_epi32(0, a, 1); + let r = _mm_maskz_ror_epi32::<1>(0, a); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_ror_epi32(0b00001111, a, 1); + let r = _mm_maskz_ror_epi32::<1>(0b00001111, a); let e = _mm_set_epi32(1 << 31, 1, 1, 1); assert_eq_m128i(r, e); } @@ -46664,9 +46564,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_mask_srai_epi32() { let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); - let r = _mm256_mask_srai_epi32(a, 0, a, 1); + let r = _mm256_mask_srai_epi32::<1>(a, 0, a); assert_eq_m256i(r, a); - let r = _mm256_mask_srai_epi32(a, 0b11111111, a, 1); + let r = _mm256_mask_srai_epi32::<1>(a, 0b11111111, a); let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0); assert_eq_m256i(r, e); } @@ -46674,9 +46574,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_maskz_srai_epi32() { let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); - let r = _mm256_maskz_srai_epi32(0, a, 1); + let r = _mm256_maskz_srai_epi32::<1>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_srai_epi32(0b11111111, a, 1); + let r = _mm256_maskz_srai_epi32::<1>(0b11111111, a); let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0); assert_eq_m256i(r, e); } @@ -46684,9 +46584,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_mask_srai_epi32() { let a = _mm_set_epi32(1 << 5, 0, 0, 0); - let r = _mm_mask_srai_epi32(a, 0, a, 1); + let r = _mm_mask_srai_epi32::<1>(a, 0, a); assert_eq_m128i(r, a); - let r = _mm_mask_srai_epi32(a, 0b00001111, a, 1); + let r = _mm_mask_srai_epi32::<1>(a, 0b00001111, a); let e = _mm_set_epi32(1 << 4, 0, 0, 0); assert_eq_m128i(r, e); } @@ -46694,9 +46594,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_maskz_srai_epi32() { let a = _mm_set_epi32(1 << 5, 0, 0, 0); - let r = _mm_maskz_srai_epi32(0, a, 1); + let r = _mm_maskz_srai_epi32::<1>(0, a); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_srai_epi32(0b00001111, a, 1); + let r = _mm_maskz_srai_epi32::<1>(0b00001111, a); let e = _mm_set_epi32(1 << 4, 0, 0, 0); assert_eq_m128i(r, e); } diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index af62b2112c..84eab28e34 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -7627,7 +7627,7 @@ mod tests { 1 << 63, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, ); - let r = _mm512_rol_epi64(a, 1); + let r = _mm512_rol_epi64::<1>(a); #[rustfmt::skip] let e = _mm512_set_epi64( 1 << 0, 1 << 33, 1 << 33, 1 << 33, @@ -7643,9 +7643,9 @@ mod tests { 1 << 63, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, ); - let r = _mm512_mask_rol_epi64(a, 0, a, 1); + let r = _mm512_mask_rol_epi64::<1>(a, 0, a); assert_eq_m512i(r, a); - let r = _mm512_mask_rol_epi64(a, 0b11111111, a, 1); + let r = _mm512_mask_rol_epi64::<1>(a, 0b11111111, a); #[rustfmt::skip] let e = _mm512_set_epi64( 1 << 0, 1 << 33, 1 << 33, 1 << 33, @@ -7661,9 +7661,9 @@ mod tests { 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 63, ); - let r = _mm512_maskz_rol_epi64(0, a, 1); + let r = _mm512_maskz_rol_epi64::<1>(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_rol_epi64(0b00001111, a, 1); + let r = _mm512_maskz_rol_epi64::<1>(0b00001111, a); let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 1 << 0); assert_eq_m512i(r, e); } @@ -7671,7 +7671,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_rol_epi64() { let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); - let r = _mm256_rol_epi64(a, 1); + let r = _mm256_rol_epi64::<1>(a); let e = _mm256_set_epi64x(1 << 0, 1 << 33, 1 << 33, 1 << 33); assert_eq_m256i(r, e); } @@ -7679,9 +7679,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_mask_rol_epi64() { let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); - let r = _mm256_mask_rol_epi64(a, 0, a, 1); + let r = _mm256_mask_rol_epi64::<1>(a, 0, a); assert_eq_m256i(r, a); - let r = _mm256_mask_rol_epi64(a, 0b00001111, a, 1); + let r = _mm256_mask_rol_epi64::<1>(a, 0b00001111, a); let e = _mm256_set_epi64x(1 << 0, 1 << 33, 1 << 33, 1 << 33); assert_eq_m256i(r, e); } @@ -7689,9 +7689,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_maskz_rol_epi64() { let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); - let r = _mm256_maskz_rol_epi64(0, a, 1); + let r = _mm256_maskz_rol_epi64::<1>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_rol_epi64(0b00001111, a, 1); + let r = _mm256_maskz_rol_epi64::<1>(0b00001111, a); let e = _mm256_set_epi64x(1 << 0, 1 << 33, 1 << 33, 1 << 33); assert_eq_m256i(r, e); } @@ -7699,7 +7699,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_rol_epi64() { let a = _mm_set_epi64x(1 << 63, 1 << 32); - let r = _mm_rol_epi64(a, 1); + let r = _mm_rol_epi64::<1>(a); let e = _mm_set_epi64x(1 << 0, 1 << 33); assert_eq_m128i(r, e); } @@ -7707,9 +7707,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_mask_rol_epi64() { let a = _mm_set_epi64x(1 << 63, 1 << 32); - let r = _mm_mask_rol_epi64(a, 0, a, 1); + let r = _mm_mask_rol_epi64::<1>(a, 0, a); assert_eq_m128i(r, a); - let r = _mm_mask_rol_epi64(a, 0b00000011, a, 1); + let r = _mm_mask_rol_epi64::<1>(a, 0b00000011, a); let e = _mm_set_epi64x(1 << 0, 1 << 33); assert_eq_m128i(r, e); } @@ -7717,9 +7717,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_maskz_rol_epi64() { let a = _mm_set_epi64x(1 << 63, 1 << 32); - let r = _mm_maskz_rol_epi64(0, a, 1); + let r = _mm_maskz_rol_epi64::<1>(0, a); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_rol_epi64(0b00000011, a, 1); + let r = _mm_maskz_rol_epi64::<1>(0b00000011, a); let e = _mm_set_epi64x(1 << 0, 1 << 33); assert_eq_m128i(r, e); } @@ -7731,7 +7731,7 @@ mod tests { 1 << 0, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, ); - let r = _mm512_ror_epi64(a, 1); + let r = _mm512_ror_epi64::<1>(a); #[rustfmt::skip] let e = _mm512_set_epi64( 1 << 63, 1 << 31, 1 << 31, 1 << 31, @@ -7747,9 +7747,9 @@ mod tests { 1 << 0, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, ); - let r = _mm512_mask_ror_epi64(a, 0, a, 1); + let r = _mm512_mask_ror_epi64::<1>(a, 0, a); assert_eq_m512i(r, a); - let r = _mm512_mask_ror_epi64(a, 0b11111111, a, 1); + let r = _mm512_mask_ror_epi64::<1>(a, 0b11111111, a); #[rustfmt::skip] let e = _mm512_set_epi64( 1 << 63, 1 << 31, 1 << 31, 1 << 31, @@ -7765,9 +7765,9 @@ mod tests { 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 32, 1 << 0, ); - let r = _mm512_maskz_ror_epi64(0, a, 1); + let r = _mm512_maskz_ror_epi64::<1>(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_ror_epi64(0b00001111, a, 1); + let r = _mm512_maskz_ror_epi64::<1>(0b00001111, a); let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 1 << 63); assert_eq_m512i(r, e); } @@ -7775,7 +7775,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_ror_epi64() { let a = _mm256_set_epi64x(1 << 0, 1 << 32, 1 << 32, 1 << 32); - let r = _mm256_ror_epi64(a, 1); + let r = _mm256_ror_epi64::<1>(a); let e = _mm256_set_epi64x(1 << 63, 1 << 31, 1 << 31, 1 << 31); assert_eq_m256i(r, e); } @@ -7783,9 +7783,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_mask_ror_epi64() { let a = _mm256_set_epi64x(1 << 0, 1 << 32, 1 << 32, 1 << 32); - let r = _mm256_mask_ror_epi64(a, 0, a, 1); + let r = _mm256_mask_ror_epi64::<1>(a, 0, a); assert_eq_m256i(r, a); - let r = _mm256_mask_ror_epi64(a, 0b00001111, a, 1); + let r = _mm256_mask_ror_epi64::<1>(a, 0b00001111, a); let e = _mm256_set_epi64x(1 << 63, 1 << 31, 1 << 31, 1 << 31); assert_eq_m256i(r, e); } @@ -7793,9 +7793,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_maskz_ror_epi64() { let a = _mm256_set_epi64x(1 << 0, 1 << 32, 1 << 32, 1 << 32); - let r = _mm256_maskz_ror_epi64(0, a, 1); + let r = _mm256_maskz_ror_epi64::<1>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_ror_epi64(0b00001111, a, 1); + let r = _mm256_maskz_ror_epi64::<1>(0b00001111, a); let e = _mm256_set_epi64x(1 << 63, 1 << 31, 1 << 31, 1 << 31); assert_eq_m256i(r, e); } @@ -7803,7 +7803,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_ror_epi64() { let a = _mm_set_epi64x(1 << 0, 1 << 32); - let r = _mm_ror_epi64(a, 1); + let r = _mm_ror_epi64::<1>(a); let e = _mm_set_epi64x(1 << 63, 1 << 31); assert_eq_m128i(r, e); } @@ -7811,9 +7811,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_mask_ror_epi64() { let a = _mm_set_epi64x(1 << 0, 1 << 32); - let r = _mm_mask_ror_epi64(a, 0, a, 1); + let r = _mm_mask_ror_epi64::<1>(a, 0, a); assert_eq_m128i(r, a); - let r = _mm_mask_ror_epi64(a, 0b00000011, a, 1); + let r = _mm_mask_ror_epi64::<1>(a, 0b00000011, a); let e = _mm_set_epi64x(1 << 63, 1 << 31); assert_eq_m128i(r, e); } @@ -7821,9 +7821,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_maskz_ror_epi64() { let a = _mm_set_epi64x(1 << 0, 1 << 32); - let r = _mm_maskz_ror_epi64(0, a, 1); + let r = _mm_maskz_ror_epi64::<1>(0, a); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_ror_epi64(0b00000011, a, 1); + let r = _mm_maskz_ror_epi64::<1>(0b00000011, a); let e = _mm_set_epi64x(1 << 63, 1 << 31); assert_eq_m128i(r, e); } From 2cbe7d9f3ef9815c885674f5434a3c2b12fc3ccd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Mi=C4=85sko?= Date: Wed, 3 Mar 2021 00:00:00 +0000 Subject: [PATCH 045/123] Convert _mm256_insert_epi64 to const generics --- crates/core_arch/src/x86/avx2.rs | 6 +++--- crates/core_arch/src/x86/test.rs | 6 ++++-- crates/core_arch/src/x86_64/avx.rs | 15 +++++---------- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs index c98c1d8005..785b0fe9bb 100644 --- a/crates/core_arch/src/x86/avx2.rs +++ b/crates/core_arch/src/x86/avx2.rs @@ -4484,7 +4484,7 @@ mod tests { let a = _mm256_setr_epi64x(0, 1, 2, 3); let b = _mm256_setr_epi64x(3, 2, 2, 0); let r = _mm256_cmpeq_epi64(a, b); - assert_eq_m256i(r, _mm256_insert_epi64(_mm256_set1_epi64x(0), !0, 2)); + assert_eq_m256i(r, _mm256_insert_epi64::<2>(_mm256_set1_epi64x(0), !0)); } #[simd_test(enable = "avx2")] @@ -4513,10 +4513,10 @@ mod tests { #[simd_test(enable = "avx2")] unsafe fn test_mm256_cmpgt_epi64() { - let a = _mm256_insert_epi64(_mm256_set1_epi64x(0), 5, 0); + let a = _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), 5); let b = _mm256_set1_epi64x(0); let r = _mm256_cmpgt_epi64(a, b); - assert_eq_m256i(r, _mm256_insert_epi64(_mm256_set1_epi64x(0), !0, 0)); + assert_eq_m256i(r, _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), !0)); } #[simd_test(enable = "avx2")] diff --git a/crates/core_arch/src/x86/test.rs b/crates/core_arch/src/x86/test.rs index 0784e37524..9f577972fa 100644 --- a/crates/core_arch/src/x86/test.rs +++ b/crates/core_arch/src/x86/test.rs @@ -104,14 +104,16 @@ mod x86_polyfill { } #[target_feature(enable = "avx2")] - pub unsafe fn _mm256_insert_epi64(a: __m256i, val: i64, idx: i32) -> __m256i { + #[rustc_legacy_const_generics(2)] + pub unsafe fn _mm256_insert_epi64(a: __m256i, val: i64) -> __m256i { + static_assert_imm2!(INDEX); #[repr(C)] union A { a: __m256i, b: [i64; 4], } let mut a = A { a }; - a.b[idx as usize] = val; + a.b[INDEX as usize] = val; a.a } } diff --git a/crates/core_arch/src/x86_64/avx.rs b/crates/core_arch/src/x86_64/avx.rs index fd82367714..7ba26371c6 100644 --- a/crates/core_arch/src/x86_64/avx.rs +++ b/crates/core_arch/src/x86_64/avx.rs @@ -23,18 +23,13 @@ use crate::{ /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insert_epi64) #[inline] -#[rustc_args_required_const(2)] +#[rustc_legacy_const_generics(2)] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_insert_epi64(a: __m256i, i: i64, index: i32) -> __m256i { - let a = a.as_i64x4(); - match index & 3 { - 0 => transmute(simd_insert(a, 0, i)), - 1 => transmute(simd_insert(a, 1, i)), - 2 => transmute(simd_insert(a, 2, i)), - _ => transmute(simd_insert(a, 3, i)), - } +pub unsafe fn _mm256_insert_epi64(a: __m256i, i: i64) -> __m256i { + static_assert_imm2!(INDEX); + transmute(simd_insert(a.as_i64x4(), INDEX as u32, i)) } #[cfg(test)] @@ -46,7 +41,7 @@ mod tests { #[simd_test(enable = "avx")] unsafe fn test_mm256_insert_epi64() { let a = _mm256_setr_epi64x(1, 2, 3, 4); - let r = _mm256_insert_epi64(a, 0, 3); + let r = _mm256_insert_epi64::<3>(a, 0); let e = _mm256_setr_epi64x(1, 2, 3, 0); assert_eq_m256i(r, e); } From ca5fed4042c9958180b7e4d609697e827e3f54b3 Mon Sep 17 00:00:00 2001 From: SparrowLii Date: Thu, 4 Mar 2021 21:21:46 +0800 Subject: [PATCH 046/123] Modify stdarch-gen to generate instructions with a single parameter and add vceqz instructions --- .../core_arch/src/aarch64/neon/generated.rs | 336 ++++++++++++++++-- crates/core_arch/src/arm/neon/generated.rs | 96 ++--- crates/core_arch/src/macros.rs | 4 +- crates/stdarch-gen/neon.spec | 20 ++ crates/stdarch-gen/src/main.rs | 116 ++++-- 5 files changed, 472 insertions(+), 100 deletions(-) diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index db76c8721d..a74aa578d8 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -73,6 +73,150 @@ pub unsafe fn vceqq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t { simd_eq(a, b) } +/// Signed Compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(vceqz))] +pub unsafe fn vceqz_s8(a: int8x8_t) -> uint8x8_t { + simd_eq(a, int8x8_t(0, 0, 0, 0, 0, 0, 0, 0)) +} + +/// Signed Compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(vceqz))] +pub unsafe fn vceqzq_s8(a: int8x16_t) -> uint8x16_t { + simd_eq(a, int8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)) +} + +/// Signed Compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(vceqz))] +pub unsafe fn vceqz_s16(a: int16x4_t) -> uint16x4_t { + simd_eq(a, int16x4_t(0, 0, 0, 0)) +} + +/// Signed Compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(vceqz))] +pub unsafe fn vceqzq_s16(a: int16x8_t) -> uint16x8_t { + simd_eq(a, int16x8_t(0, 0, 0, 0, 0, 0, 0, 0)) +} + +/// Signed Compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(vceqz))] +pub unsafe fn vceqz_s32(a: int32x2_t) -> uint32x2_t { + simd_eq(a, int32x2_t(0, 0)) +} + +/// Signed Compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(vceqz))] +pub unsafe fn vceqzq_s32(a: int32x4_t) -> uint32x4_t { + simd_eq(a, int32x4_t(0, 0, 0, 0)) +} + +/// Signed Compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(vceqz))] +pub unsafe fn vceqz_s64(a: int64x1_t) -> uint64x1_t { + simd_eq(a, int64x1_t(0)) +} + +/// Signed Compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(vceqz))] +pub unsafe fn vceqzq_s64(a: int64x2_t) -> uint64x2_t { + simd_eq(a, int64x2_t(0, 0)) +} + +/// Signed Compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(vceqz))] +pub unsafe fn vceqz_p64(a: poly64x1_t) -> uint64x1_t { + simd_eq(a, poly64x1_t(0)) +} + +/// Signed Compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(vceqz))] +pub unsafe fn vceqzq_p64(a: poly64x2_t) -> uint64x2_t { + simd_eq(a, poly64x2_t(0, 0)) +} + +/// Unsigned Compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(vceqz))] +pub unsafe fn vceqz_u8(a: uint8x8_t) -> uint8x8_t { + simd_eq(a, uint8x8_t(0, 0, 0, 0, 0, 0, 0, 0)) +} + +/// Unsigned Compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(vceqz))] +pub unsafe fn vceqzq_u8(a: uint8x16_t) -> uint8x16_t { + simd_eq(a, uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)) +} + +/// Unsigned Compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(vceqz))] +pub unsafe fn vceqz_u16(a: uint16x4_t) -> uint16x4_t { + simd_eq(a, uint16x4_t(0, 0, 0, 0)) +} + +/// Unsigned Compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(vceqz))] +pub unsafe fn vceqzq_u16(a: uint16x8_t) -> uint16x8_t { + simd_eq(a, uint16x8_t(0, 0, 0, 0, 0, 0, 0, 0)) +} + +/// Unsigned Compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(vceqz))] +pub unsafe fn vceqz_u32(a: uint32x2_t) -> uint32x2_t { + simd_eq(a, uint32x2_t(0, 0)) +} + +/// Unsigned Compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(vceqz))] +pub unsafe fn vceqzq_u32(a: uint32x4_t) -> uint32x4_t { + simd_eq(a, uint32x4_t(0, 0, 0, 0)) +} + +/// Unsigned Compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(vceqz))] +pub unsafe fn vceqz_u64(a: uint64x1_t) -> uint64x1_t { + simd_eq(a, uint64x1_t(0)) +} + +/// Unsigned Compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(vceqz))] +pub unsafe fn vceqzq_u64(a: uint64x2_t) -> uint64x2_t { + simd_eq(a, uint64x2_t(0, 0)) +} + /// Compare signed greater than #[inline] #[target_feature(enable = "neon")] @@ -358,14 +502,14 @@ mod test { #[simd_test(enable = "neon")] unsafe fn test_vceq_u64() { - let a: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); - let b: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let a: u64x1 = u64x1::new(0); + let b: u64x1 = u64x1::new(0); let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); let r: u64x1 = transmute(vceq_u64(transmute(a), transmute(b))); assert_eq!(r, e); - let a: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); - let b: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let a: u64x1 = u64x1::new(0); + let b: u64x1 = u64x1::new(0); let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); let r: u64x1 = transmute(vceq_u64(transmute(a), transmute(b))); assert_eq!(r, e); @@ -373,14 +517,14 @@ mod test { #[simd_test(enable = "neon")] unsafe fn test_vceqq_u64() { - let a: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0x01); - let b: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0x01); + let a: u64x2 = u64x2::new(0, 0x01); + let b: u64x2 = u64x2::new(0, 0x01); let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); let r: u64x2 = transmute(vceqq_u64(transmute(a), transmute(b))); assert_eq!(r, e); - let a: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); - let b: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0); + let a: u64x2 = u64x2::new(0, 0); + let b: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF); let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0); let r: u64x2 = transmute(vceqq_u64(transmute(a), transmute(b))); assert_eq!(r, e); @@ -388,14 +532,14 @@ mod test { #[simd_test(enable = "neon")] unsafe fn test_vceq_s64() { - let a: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); - let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); + let a: i64x1 = i64x1::new(-9223372036854775808); + let b: i64x1 = i64x1::new(-9223372036854775808); let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); let r: u64x1 = transmute(vceq_s64(transmute(a), transmute(b))); assert_eq!(r, e); - let a: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); - let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); + let a: i64x1 = i64x1::new(-9223372036854775808); + let b: i64x1 = i64x1::new(-9223372036854775808); let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); let r: u64x1 = transmute(vceq_s64(transmute(a), transmute(b))); assert_eq!(r, e); @@ -403,14 +547,14 @@ mod test { #[simd_test(enable = "neon")] unsafe fn test_vceqq_s64() { - let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x01); - let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x01); + let a: i64x2 = i64x2::new(-9223372036854775808, 0x01); + let b: i64x2 = i64x2::new(-9223372036854775808, 0x01); let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); let r: u64x2 = transmute(vceqq_s64(transmute(a), transmute(b))); assert_eq!(r, e); - let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x7F_FF_FF_FF_FF_FF_FF_FF); - let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, -9223372036854775808); + let a: i64x2 = i64x2::new(-9223372036854775808, -9223372036854775808); + let b: i64x2 = i64x2::new(-9223372036854775808, 0x7F_FF_FF_FF_FF_FF_FF_FF); let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0); let r: u64x2 = transmute(vceqq_s64(transmute(a), transmute(b))); assert_eq!(r, e); @@ -418,14 +562,14 @@ mod test { #[simd_test(enable = "neon")] unsafe fn test_vceq_p64() { - let a: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); - let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); + let a: i64x1 = i64x1::new(-9223372036854775808); + let b: i64x1 = i64x1::new(-9223372036854775808); let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); let r: u64x1 = transmute(vceq_p64(transmute(a), transmute(b))); assert_eq!(r, e); - let a: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); - let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); + let a: i64x1 = i64x1::new(-9223372036854775808); + let b: i64x1 = i64x1::new(-9223372036854775808); let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); let r: u64x1 = transmute(vceq_p64(transmute(a), transmute(b))); assert_eq!(r, e); @@ -433,14 +577,14 @@ mod test { #[simd_test(enable = "neon")] unsafe fn test_vceqq_p64() { - let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x01); - let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x01); + let a: i64x2 = i64x2::new(-9223372036854775808, 0x01); + let b: i64x2 = i64x2::new(-9223372036854775808, 0x01); let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); let r: u64x2 = transmute(vceqq_p64(transmute(a), transmute(b))); assert_eq!(r, e); - let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x7F_FF_FF_FF_FF_FF_FF_FF); - let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, -9223372036854775808); + let a: i64x2 = i64x2::new(-9223372036854775808, -9223372036854775808); + let b: i64x2 = i64x2::new(-9223372036854775808, 0x7F_FF_FF_FF_FF_FF_FF_FF); let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0); let r: u64x2 = transmute(vceqq_p64(transmute(a), transmute(b))); assert_eq!(r, e); @@ -464,6 +608,150 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vceqz_s8() { + let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let e: u8x8 = u8x8::new(0, 0xFF, 0, 0, 0, 0, 0, 0); + let r: u8x8 = transmute(vceqz_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqzq_s8() { + let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F); + let e: u8x16 = u8x16::new(0, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let r: u8x16 = transmute(vceqzq_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqz_s16() { + let a: i16x4 = i16x4::new(-32768, 0x00, 0x01, 0x02); + let e: u16x4 = u16x4::new(0, 0xFF_FF, 0, 0); + let r: u16x4 = transmute(vceqz_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqzq_s16() { + let a: i16x8 = i16x8::new(-32768, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let e: u16x8 = u16x8::new(0, 0xFF_FF, 0, 0, 0, 0, 0, 0); + let r: u16x8 = transmute(vceqzq_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqz_s32() { + let a: i32x2 = i32x2::new(-2147483648, 0x00); + let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vceqz_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqzq_s32() { + let a: i32x4 = i32x4::new(-2147483648, 0x00, 0x01, 0x02); + let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0); + let r: u32x4 = transmute(vceqzq_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqz_s64() { + let a: i64x1 = i64x1::new(-9223372036854775808); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vceqz_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqzq_s64() { + let a: i64x2 = i64x2::new(-9223372036854775808, 0x00); + let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vceqzq_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqz_p64() { + let a: i64x1 = i64x1::new(-9223372036854775808); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vceqz_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqzq_p64() { + let a: i64x2 = i64x2::new(-9223372036854775808, 0x00); + let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vceqzq_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqz_u8() { + let a: u8x8 = u8x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0, 0, 0, 0, 0, 0); + let r: u8x8 = transmute(vceqz_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqzq_u8() { + let a: u8x16 = u8x16::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0xFF); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let r: u8x16 = transmute(vceqzq_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqz_u16() { + let a: u16x4 = u16x4::new(0, 0x00, 0x01, 0x02); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0, 0); + let r: u16x4 = transmute(vceqz_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqzq_u16() { + let a: u16x8 = u16x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0, 0, 0, 0, 0, 0); + let r: u16x8 = transmute(vceqzq_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqz_u32() { + let a: u32x2 = u32x2::new(0, 0x00); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vceqz_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqzq_u32() { + let a: u32x4 = u32x4::new(0, 0x00, 0x01, 0x02); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0, 0); + let r: u32x4 = transmute(vceqzq_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqz_u64() { + let a: u64x1 = u64x1::new(0); + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vceqz_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqzq_u64() { + let a: u64x2 = u64x2::new(0, 0x00); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vceqzq_u64(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vcgt_s64() { let a: i64x1 = i64x1::new(1); diff --git a/crates/core_arch/src/arm/neon/generated.rs b/crates/core_arch/src/arm/neon/generated.rs index c60ad9cc50..da8fcefcf6 100644 --- a/crates/core_arch/src/arm/neon/generated.rs +++ b/crates/core_arch/src/arm/neon/generated.rs @@ -3455,14 +3455,14 @@ mod test { #[simd_test(enable = "neon")] unsafe fn test_vceq_u8() { - let a: u8x8 = u8x8::new(0xFF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: u8x8 = u8x8::new(0xFF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let a: u8x8 = u8x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u8x8 = u8x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); let r: u8x8 = transmute(vceq_u8(transmute(a), transmute(b))); assert_eq!(r, e); - let a: u8x8 = u8x8::new(0xFF, 0xFF, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: u8x8 = u8x8::new(0xFF, 0, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); + let a: u8x8 = u8x8::new(0, 0, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u8x8 = u8x8::new(0, 0xFF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); let r: u8x8 = transmute(vceq_u8(transmute(a), transmute(b))); assert_eq!(r, e); @@ -3470,14 +3470,14 @@ mod test { #[simd_test(enable = "neon")] unsafe fn test_vceqq_u8() { - let a: u8x16 = u8x16::new(0xFF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0); - let b: u8x16 = u8x16::new(0xFF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0); + let a: u8x16 = u8x16::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0xFF); + let b: u8x16 = u8x16::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0xFF); let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); let r: u8x16 = transmute(vceqq_u8(transmute(a), transmute(b))); assert_eq!(r, e); - let a: u8x16 = u8x16::new(0xFF, 0xFF, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0); - let b: u8x16 = u8x16::new(0xFF, 0, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, 0xFF); + let a: u8x16 = u8x16::new(0, 0, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0xFF); + let b: u8x16 = u8x16::new(0, 0xFF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, 0); let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); let r: u8x16 = transmute(vceqq_u8(transmute(a), transmute(b))); assert_eq!(r, e); @@ -3485,14 +3485,14 @@ mod test { #[simd_test(enable = "neon")] unsafe fn test_vceq_u16() { - let a: u16x4 = u16x4::new(0xFF_FF, 0x01, 0x02, 0x03); - let b: u16x4 = u16x4::new(0xFF_FF, 0x01, 0x02, 0x03); + let a: u16x4 = u16x4::new(0, 0x01, 0x02, 0x03); + let b: u16x4 = u16x4::new(0, 0x01, 0x02, 0x03); let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); let r: u16x4 = transmute(vceq_u16(transmute(a), transmute(b))); assert_eq!(r, e); - let a: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0x02, 0x03); - let b: u16x4 = u16x4::new(0xFF_FF, 0, 0x02, 0x04); + let a: u16x4 = u16x4::new(0, 0, 0x02, 0x03); + let b: u16x4 = u16x4::new(0, 0xFF_FF, 0x02, 0x04); let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0); let r: u16x4 = transmute(vceq_u16(transmute(a), transmute(b))); assert_eq!(r, e); @@ -3500,14 +3500,14 @@ mod test { #[simd_test(enable = "neon")] unsafe fn test_vceqq_u16() { - let a: u16x8 = u16x8::new(0xFF_FF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: u16x8 = u16x8::new(0xFF_FF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let a: u16x8 = u16x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u16x8 = u16x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); let r: u16x8 = transmute(vceqq_u16(transmute(a), transmute(b))); assert_eq!(r, e); - let a: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: u16x8 = u16x8::new(0xFF_FF, 0, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); + let a: u16x8 = u16x8::new(0, 0, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u16x8 = u16x8::new(0, 0xFF_FF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0); let r: u16x8 = transmute(vceqq_u16(transmute(a), transmute(b))); assert_eq!(r, e); @@ -3515,14 +3515,14 @@ mod test { #[simd_test(enable = "neon")] unsafe fn test_vceq_u32() { - let a: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0x01); - let b: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0x01); + let a: u32x2 = u32x2::new(0, 0x01); + let b: u32x2 = u32x2::new(0, 0x01); let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); let r: u32x2 = transmute(vceq_u32(transmute(a), transmute(b))); assert_eq!(r, e); - let a: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let b: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); + let a: u32x2 = u32x2::new(0, 0); + let b: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF); let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); let r: u32x2 = transmute(vceq_u32(transmute(a), transmute(b))); assert_eq!(r, e); @@ -3530,14 +3530,14 @@ mod test { #[simd_test(enable = "neon")] unsafe fn test_vceqq_u32() { - let a: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0x01, 0x02, 0x03); - let b: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0x01, 0x02, 0x03); + let a: u32x4 = u32x4::new(0, 0x01, 0x02, 0x03); + let b: u32x4 = u32x4::new(0, 0x01, 0x02, 0x03); let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); let r: u32x4 = transmute(vceqq_u32(transmute(a), transmute(b))); assert_eq!(r, e); - let a: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0x02, 0x03); - let b: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0x02, 0x04); + let a: u32x4 = u32x4::new(0, 0, 0x02, 0x03); + let b: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0x02, 0x04); let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0); let r: u32x4 = transmute(vceqq_u32(transmute(a), transmute(b))); assert_eq!(r, e); @@ -3545,14 +3545,14 @@ mod test { #[simd_test(enable = "neon")] unsafe fn test_vceq_s8() { - let a: i8x8 = i8x8::new(0x7F, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: i8x8 = i8x8::new(0x7F, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let a: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); let r: u8x8 = transmute(vceq_s8(transmute(a), transmute(b))); assert_eq!(r, e); - let a: i8x8 = i8x8::new(0x7F, 0x7F, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: i8x8 = i8x8::new(0x7F, -128, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); + let a: i8x8 = i8x8::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); let r: u8x8 = transmute(vceq_s8(transmute(a), transmute(b))); assert_eq!(r, e); @@ -3560,14 +3560,14 @@ mod test { #[simd_test(enable = "neon")] unsafe fn test_vceqq_s8() { - let a: i8x16 = i8x16::new(0x7F, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, -128); - let b: i8x16 = i8x16::new(0x7F, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, -128); + let a: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F); + let b: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F); let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); let r: u8x16 = transmute(vceqq_s8(transmute(a), transmute(b))); assert_eq!(r, e); - let a: i8x16 = i8x16::new(0x7F, 0x7F, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, -128); - let b: i8x16 = i8x16::new(0x7F, -128, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, 0x7F); + let a: i8x16 = i8x16::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0x7F); + let b: i8x16 = i8x16::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, -128); let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); let r: u8x16 = transmute(vceqq_s8(transmute(a), transmute(b))); assert_eq!(r, e); @@ -3575,14 +3575,14 @@ mod test { #[simd_test(enable = "neon")] unsafe fn test_vceq_s16() { - let a: i16x4 = i16x4::new(0x7F_FF, 0x01, 0x02, 0x03); - let b: i16x4 = i16x4::new(0x7F_FF, 0x01, 0x02, 0x03); + let a: i16x4 = i16x4::new(-32768, 0x01, 0x02, 0x03); + let b: i16x4 = i16x4::new(-32768, 0x01, 0x02, 0x03); let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); let r: u16x4 = transmute(vceq_s16(transmute(a), transmute(b))); assert_eq!(r, e); - let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x02, 0x03); - let b: i16x4 = i16x4::new(0x7F_FF, -32768, 0x02, 0x04); + let a: i16x4 = i16x4::new(-32768, -32768, 0x02, 0x03); + let b: i16x4 = i16x4::new(-32768, 0x7F_FF, 0x02, 0x04); let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0); let r: u16x4 = transmute(vceq_s16(transmute(a), transmute(b))); assert_eq!(r, e); @@ -3590,14 +3590,14 @@ mod test { #[simd_test(enable = "neon")] unsafe fn test_vceqq_s16() { - let a: i16x8 = i16x8::new(0x7F_FF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: i16x8 = i16x8::new(0x7F_FF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let a: i16x8 = i16x8::new(-32768, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i16x8 = i16x8::new(-32768, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); let r: u16x8 = transmute(vceqq_s16(transmute(a), transmute(b))); assert_eq!(r, e); - let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: i16x8 = i16x8::new(0x7F_FF, -32768, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); + let a: i16x8 = i16x8::new(-32768, -32768, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i16x8 = i16x8::new(-32768, 0x7F_FF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0); let r: u16x8 = transmute(vceqq_s16(transmute(a), transmute(b))); assert_eq!(r, e); @@ -3605,14 +3605,14 @@ mod test { #[simd_test(enable = "neon")] unsafe fn test_vceq_s32() { - let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x01); - let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x01); + let a: i32x2 = i32x2::new(-2147483648, 0x01); + let b: i32x2 = i32x2::new(-2147483648, 0x01); let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); let r: u32x2 = transmute(vceq_s32(transmute(a), transmute(b))); assert_eq!(r, e); - let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF); - let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, -2147483648); + let a: i32x2 = i32x2::new(-2147483648, -2147483648); + let b: i32x2 = i32x2::new(-2147483648, 0x7F_FF_FF_FF); let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); let r: u32x2 = transmute(vceq_s32(transmute(a), transmute(b))); assert_eq!(r, e); @@ -3620,14 +3620,14 @@ mod test { #[simd_test(enable = "neon")] unsafe fn test_vceqq_s32() { - let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x01, 0x02, 0x03); - let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x01, 0x02, 0x03); + let a: i32x4 = i32x4::new(-2147483648, 0x01, 0x02, 0x03); + let b: i32x4 = i32x4::new(-2147483648, 0x01, 0x02, 0x03); let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); let r: u32x4 = transmute(vceqq_s32(transmute(a), transmute(b))); assert_eq!(r, e); - let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x02, 0x03); - let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, -2147483648, 0x02, 0x04); + let a: i32x4 = i32x4::new(-2147483648, -2147483648, 0x02, 0x03); + let b: i32x4 = i32x4::new(-2147483648, 0x7F_FF_FF_FF, 0x02, 0x04); let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0); let r: u32x4 = transmute(vceqq_s32(transmute(a), transmute(b))); assert_eq!(r, e); diff --git a/crates/core_arch/src/macros.rs b/crates/core_arch/src/macros.rs index 87e49fba4b..5d05adfa6a 100644 --- a/crates/core_arch/src/macros.rs +++ b/crates/core_arch/src/macros.rs @@ -426,13 +426,13 @@ macro_rules! constify_imm3 { macro_rules! types { ($( $(#[$doc:meta])* - pub struct $name:ident($($fields:tt)*); + pub struct $name:ident($field:ty$(, $fields:ty)*$(,)?); )*) => ($( $(#[$doc])* #[derive(Copy, Clone, Debug)] #[allow(non_camel_case_types)] #[repr(simd)] #[allow(clippy::missing_inline_in_public_items)] - pub struct $name($($fields)*); + pub struct $name(pub $field$(, pub $fields)*); )*) } diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec index 5c705c15db..7805df501b 100644 --- a/crates/stdarch-gen/neon.spec +++ b/crates/stdarch-gen/neon.spec @@ -136,6 +136,26 @@ arm = vceq. // we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t +/// Signed Compare bitwise equal to zero +name = vceqz +fn = simd_eq +a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX +fixed = 0 +validate FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE + +aarch64 = vceqz +generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t + +/// Unsigned Compare bitwise equal to zero +name = vceqz +fn = simd_eq +a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX +fixed = 0 +validate TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE + +aarch64 = vceqz +generate uint*_t, uint64x*_t + //////////////////// // greater then //////////////////// diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs index 6e341c00dd..7498ab93ff 100644 --- a/crates/stdarch-gen/src/main.rs +++ b/crates/stdarch-gen/src/main.rs @@ -282,8 +282,8 @@ fn map_val<'v>(t: &str, v: &'v str) -> &'v str { match v { "FALSE" => false_val(t), "TRUE" => true_val(t), - "MAX" => min_val(t), - "MIN" => max_val(t), + "MAX" => max_val(t), + "MIN" => min_val(t), "FF" => ff_val(t), o => o, } @@ -299,6 +299,8 @@ fn gen_aarch64( in_t: &str, out_t: &str, current_tests: &[(Vec, Vec, Vec)], + has_b: bool, + fixed: &Option, ) -> (String, String) { let _global_t = type_to_global_type(in_t); let _global_ret_t = type_to_global_type(out_t); @@ -333,20 +335,40 @@ fn gen_aarch64( } else { String::new() }; + let call = if has_b { + format!( + r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{ + {}{}(a, b) +}}"#, + name, in_t, in_t, out_t, ext_c, current_fn, + ) + } else if let Some(fixed_val) = fixed { + let mut fixed_vals = fixed_val.clone(); + for _i in 1..type_len(in_t) { + fixed_vals.push_str(", "); + fixed_vals.push_str(fixed_val); + } + format!( + r#"pub unsafe fn {}(a: {}) -> {} {{ + {}{}(a, {}({})) +}}"#, + name, in_t, out_t, ext_c, current_fn, in_t, fixed_vals, + ) + } else { + String::new() + }; let function = format!( r#" {} #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr({}))] -pub unsafe fn {}(a: {}, b: {}) -> {} {{ - {}{}(a, b) -}} +{} "#, - current_comment, current_aarch64, name, in_t, in_t, out_t, ext_c, current_fn, + current_comment, current_aarch64, call ); - let test = gen_test(name, &in_t, &out_t, current_tests, type_len(in_t)); + let test = gen_test(name, &in_t, &out_t, current_tests, type_len(in_t), has_b); (function, test) } @@ -356,6 +378,7 @@ fn gen_test( out_t: &str, current_tests: &[(Vec, Vec, Vec)], len: usize, + has_b: bool, ) -> String { let mut test = format!( r#" @@ -367,20 +390,35 @@ fn gen_test( let a: Vec = a.iter().take(len).cloned().collect(); let b: Vec = b.iter().take(len).cloned().collect(); let e: Vec = e.iter().take(len).cloned().collect(); - let t = format!( - r#" + let t = if has_b { + format!( + r#" let a{}; let b{}; let e{}; let r: {} = transmute({}(transmute(a), transmute(b))); assert_eq!(r, e); "#, - values(in_t, &a), - values(in_t, &b), - values(out_t, &e), - type_to_global_type(out_t), - name - ); + values(in_t, &a), + values(in_t, &b), + values(out_t, &e), + type_to_global_type(out_t), + name + ) + } else { + format!( + r#" + let a{}; + let e{}; + let r: {} = transmute({}(transmute(a))); + assert_eq!(r, e); +"#, + values(in_t, &a), + values(out_t, &e), + type_to_global_type(out_t), + name + ) + }; test.push_str(&t); } test.push_str(" }\n"); @@ -399,6 +437,8 @@ fn gen_arm( in_t: &str, out_t: &str, current_tests: &[(Vec, Vec, Vec)], + has_b: bool, + fixed: &Option, ) -> (String, String) { let _global_t = type_to_global_type(in_t); let _global_ret_t = type_to_global_type(out_t); @@ -446,7 +486,28 @@ fn gen_arm( } else { String::new() }; - + let call = if has_b { + format!( + r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{ + {}{}(a, b) +}}"#, + name, in_t, in_t, out_t, ext_c, current_fn, + ) + } else if let Some(fixed_val) = fixed { + let mut fixed_vals = fixed_val.clone(); + for _i in 1..type_len(in_t) { + fixed_vals.push_str(", "); + fixed_vals.push_str(fixed_val); + } + format!( + r#"pub unsafe fn {}(a: {}) -> {} {{ + {}{}(a, {}({})) +}}"#, + name, in_t, out_t, ext_c, current_fn, in_t, fixed_vals, + ) + } else { + String::new() + }; let function = format!( r#" {} @@ -455,21 +516,14 @@ fn gen_arm( #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr({}))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr({}))] -pub unsafe fn {}(a: {}, b: {}) -> {} {{ - {}{}(a, b) -}} +{} "#, current_comment, expand_intrinsic(¤t_arm, in_t), expand_intrinsic(¤t_aarch64, in_t), - name, - in_t, - in_t, - out_t, - ext_c, - current_fn, + call, ); - let test = gen_test(name, &in_t, &out_t, current_tests, type_len(in_t)); + let test = gen_test(name, &in_t, &out_t, current_tests, type_len(in_t), has_b); (function, test) } @@ -558,6 +612,7 @@ fn main() -> io::Result<()> { let mut link_aarch64: Option = None; let mut a: Vec = Vec::new(); let mut b: Vec = Vec::new(); + let mut fixed: Option = None; let mut current_tests: Vec<(Vec, Vec, Vec)> = Vec::new(); // @@ -628,6 +683,9 @@ mod test { link_aarch64 = None; link_arm = None; current_tests = Vec::new(); + a = Vec::new(); + b = Vec::new(); + fixed = None; } else if line.starts_with("//") { } else if line.starts_with("name = ") { current_name = Some(String::from(&line[7..])); @@ -641,6 +699,8 @@ mod test { a = line[4..].split(',').map(|v| v.trim().to_string()).collect(); } else if line.starts_with("b = ") { b = line[4..].split(',').map(|v| v.trim().to_string()).collect(); + } else if line.starts_with("fixed = ") { + fixed = Some(String::from(&line[8..])); } else if line.starts_with("validate ") { let e = line[9..].split(',').map(|v| v.trim().to_string()).collect(); current_tests.push((a.clone(), b.clone(), e)); @@ -692,6 +752,8 @@ mod test { &in_t, &out_t, ¤t_tests, + b.len() > 0, + &fixed, ); out_arm.push_str(&function); tests_arm.push_str(&test); @@ -705,6 +767,8 @@ mod test { &in_t, &out_t, ¤t_tests, + b.len() > 0, + &fixed, ); out_aarch64.push_str(&function); tests_aarch64.push_str(&test); From 0af94738ef6d67ee6a2e2b8abe10d345d906cb0c Mon Sep 17 00:00:00 2001 From: SparrowLii Date: Thu, 4 Mar 2021 21:43:10 +0800 Subject: [PATCH 047/123] add #[rustfmt::skip] in aarch64/neon/mod.rs --- crates/core_arch/src/aarch64/neon/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index 383f8a18a6..94865d5dd1 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -2,7 +2,9 @@ #![allow(non_camel_case_types)] +#[rustfmt::skip] mod generated; +#[rustfmt::skip] pub use self::generated::*; // FIXME: replace neon with asimd From 0f524299d45e177549f4d0ef12ba9b7db11a3fa4 Mon Sep 17 00:00:00 2001 From: SparrowLii Date: Thu, 4 Mar 2021 22:29:04 +0800 Subject: [PATCH 048/123] use transmute in vceqz instructions in aarch64/neon/generated.rs --- .../core_arch/src/aarch64/neon/generated.rs | 36 ++++++------- crates/core_arch/src/macros.rs | 4 +- crates/stdarch-gen/src/main.rs | 54 ++++++++++++++----- 3 files changed, 60 insertions(+), 34 deletions(-) diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index a74aa578d8..da9bce257b 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -78,7 +78,7 @@ pub unsafe fn vceqq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(vceqz))] pub unsafe fn vceqz_s8(a: int8x8_t) -> uint8x8_t { - simd_eq(a, int8x8_t(0, 0, 0, 0, 0, 0, 0, 0)) + simd_eq(a, transmute(i8x8::new(0, 0, 0, 0, 0, 0, 0, 0))) } /// Signed Compare bitwise equal to zero @@ -86,7 +86,7 @@ pub unsafe fn vceqz_s8(a: int8x8_t) -> uint8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(vceqz))] pub unsafe fn vceqzq_s8(a: int8x16_t) -> uint8x16_t { - simd_eq(a, int8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)) + simd_eq(a, transmute(i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))) } /// Signed Compare bitwise equal to zero @@ -94,7 +94,7 @@ pub unsafe fn vceqzq_s8(a: int8x16_t) -> uint8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(vceqz))] pub unsafe fn vceqz_s16(a: int16x4_t) -> uint16x4_t { - simd_eq(a, int16x4_t(0, 0, 0, 0)) + simd_eq(a, transmute(i16x4::new(0, 0, 0, 0))) } /// Signed Compare bitwise equal to zero @@ -102,7 +102,7 @@ pub unsafe fn vceqz_s16(a: int16x4_t) -> uint16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(vceqz))] pub unsafe fn vceqzq_s16(a: int16x8_t) -> uint16x8_t { - simd_eq(a, int16x8_t(0, 0, 0, 0, 0, 0, 0, 0)) + simd_eq(a, transmute(i16x8::new(0, 0, 0, 0, 0, 0, 0, 0))) } /// Signed Compare bitwise equal to zero @@ -110,7 +110,7 @@ pub unsafe fn vceqzq_s16(a: int16x8_t) -> uint16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(vceqz))] pub unsafe fn vceqz_s32(a: int32x2_t) -> uint32x2_t { - simd_eq(a, int32x2_t(0, 0)) + simd_eq(a, transmute(i32x2::new(0, 0))) } /// Signed Compare bitwise equal to zero @@ -118,7 +118,7 @@ pub unsafe fn vceqz_s32(a: int32x2_t) -> uint32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(vceqz))] pub unsafe fn vceqzq_s32(a: int32x4_t) -> uint32x4_t { - simd_eq(a, int32x4_t(0, 0, 0, 0)) + simd_eq(a, transmute(i32x4::new(0, 0, 0, 0))) } /// Signed Compare bitwise equal to zero @@ -126,7 +126,7 @@ pub unsafe fn vceqzq_s32(a: int32x4_t) -> uint32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(vceqz))] pub unsafe fn vceqz_s64(a: int64x1_t) -> uint64x1_t { - simd_eq(a, int64x1_t(0)) + simd_eq(a, transmute(i64x1::new(0))) } /// Signed Compare bitwise equal to zero @@ -134,7 +134,7 @@ pub unsafe fn vceqz_s64(a: int64x1_t) -> uint64x1_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(vceqz))] pub unsafe fn vceqzq_s64(a: int64x2_t) -> uint64x2_t { - simd_eq(a, int64x2_t(0, 0)) + simd_eq(a, transmute(i64x2::new(0, 0))) } /// Signed Compare bitwise equal to zero @@ -142,7 +142,7 @@ pub unsafe fn vceqzq_s64(a: int64x2_t) -> uint64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(vceqz))] pub unsafe fn vceqz_p64(a: poly64x1_t) -> uint64x1_t { - simd_eq(a, poly64x1_t(0)) + simd_eq(a, transmute(i64x1::new(0))) } /// Signed Compare bitwise equal to zero @@ -150,7 +150,7 @@ pub unsafe fn vceqz_p64(a: poly64x1_t) -> uint64x1_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(vceqz))] pub unsafe fn vceqzq_p64(a: poly64x2_t) -> uint64x2_t { - simd_eq(a, poly64x2_t(0, 0)) + simd_eq(a, transmute(i64x2::new(0, 0))) } /// Unsigned Compare bitwise equal to zero @@ -158,7 +158,7 @@ pub unsafe fn vceqzq_p64(a: poly64x2_t) -> uint64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(vceqz))] pub unsafe fn vceqz_u8(a: uint8x8_t) -> uint8x8_t { - simd_eq(a, uint8x8_t(0, 0, 0, 0, 0, 0, 0, 0)) + simd_eq(a, transmute(u8x8::new(0, 0, 0, 0, 0, 0, 0, 0))) } /// Unsigned Compare bitwise equal to zero @@ -166,7 +166,7 @@ pub unsafe fn vceqz_u8(a: uint8x8_t) -> uint8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(vceqz))] pub unsafe fn vceqzq_u8(a: uint8x16_t) -> uint8x16_t { - simd_eq(a, uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)) + simd_eq(a, transmute(u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))) } /// Unsigned Compare bitwise equal to zero @@ -174,7 +174,7 @@ pub unsafe fn vceqzq_u8(a: uint8x16_t) -> uint8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(vceqz))] pub unsafe fn vceqz_u16(a: uint16x4_t) -> uint16x4_t { - simd_eq(a, uint16x4_t(0, 0, 0, 0)) + simd_eq(a, transmute(u16x4::new(0, 0, 0, 0))) } /// Unsigned Compare bitwise equal to zero @@ -182,7 +182,7 @@ pub unsafe fn vceqz_u16(a: uint16x4_t) -> uint16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(vceqz))] pub unsafe fn vceqzq_u16(a: uint16x8_t) -> uint16x8_t { - simd_eq(a, uint16x8_t(0, 0, 0, 0, 0, 0, 0, 0)) + simd_eq(a, transmute(u16x8::new(0, 0, 0, 0, 0, 0, 0, 0))) } /// Unsigned Compare bitwise equal to zero @@ -190,7 +190,7 @@ pub unsafe fn vceqzq_u16(a: uint16x8_t) -> uint16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(vceqz))] pub unsafe fn vceqz_u32(a: uint32x2_t) -> uint32x2_t { - simd_eq(a, uint32x2_t(0, 0)) + simd_eq(a, transmute(u32x2::new(0, 0))) } /// Unsigned Compare bitwise equal to zero @@ -198,7 +198,7 @@ pub unsafe fn vceqz_u32(a: uint32x2_t) -> uint32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(vceqz))] pub unsafe fn vceqzq_u32(a: uint32x4_t) -> uint32x4_t { - simd_eq(a, uint32x4_t(0, 0, 0, 0)) + simd_eq(a, transmute(u32x4::new(0, 0, 0, 0))) } /// Unsigned Compare bitwise equal to zero @@ -206,7 +206,7 @@ pub unsafe fn vceqzq_u32(a: uint32x4_t) -> uint32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(vceqz))] pub unsafe fn vceqz_u64(a: uint64x1_t) -> uint64x1_t { - simd_eq(a, uint64x1_t(0)) + simd_eq(a, transmute(u64x1::new(0))) } /// Unsigned Compare bitwise equal to zero @@ -214,7 +214,7 @@ pub unsafe fn vceqz_u64(a: uint64x1_t) -> uint64x1_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(vceqz))] pub unsafe fn vceqzq_u64(a: uint64x2_t) -> uint64x2_t { - simd_eq(a, uint64x2_t(0, 0)) + simd_eq(a, transmute(u64x2::new(0, 0))) } /// Compare signed greater than diff --git a/crates/core_arch/src/macros.rs b/crates/core_arch/src/macros.rs index 5d05adfa6a..87e49fba4b 100644 --- a/crates/core_arch/src/macros.rs +++ b/crates/core_arch/src/macros.rs @@ -426,13 +426,13 @@ macro_rules! constify_imm3 { macro_rules! types { ($( $(#[$doc:meta])* - pub struct $name:ident($field:ty$(, $fields:ty)*$(,)?); + pub struct $name:ident($($fields:tt)*); )*) => ($( $(#[$doc])* #[derive(Copy, Clone, Debug)] #[allow(non_camel_case_types)] #[repr(simd)] #[allow(clippy::missing_inline_in_public_items)] - pub struct $name(pub $field$(, pub $fields)*); + pub struct $name($($fields)*); )*) } diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs index 7498ab93ff..39836bd916 100644 --- a/crates/stdarch-gen/src/main.rs +++ b/crates/stdarch-gen/src/main.rs @@ -299,7 +299,7 @@ fn gen_aarch64( in_t: &str, out_t: &str, current_tests: &[(Vec, Vec, Vec)], - has_b: bool, + single_para: bool, fixed: &Option, ) -> (String, String) { let _global_t = type_to_global_type(in_t); @@ -335,7 +335,7 @@ fn gen_aarch64( } else { String::new() }; - let call = if has_b { + let call = if !single_para { format!( r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{ {}{}(a, b) @@ -350,9 +350,15 @@ fn gen_aarch64( } format!( r#"pub unsafe fn {}(a: {}) -> {} {{ - {}{}(a, {}({})) + {}{}(a, transmute({}::new({}))) }}"#, - name, in_t, out_t, ext_c, current_fn, in_t, fixed_vals, + name, + in_t, + out_t, + ext_c, + current_fn, + type_to_global_type(in_t), + fixed_vals, ) } else { String::new() @@ -368,7 +374,14 @@ fn gen_aarch64( current_comment, current_aarch64, call ); - let test = gen_test(name, &in_t, &out_t, current_tests, type_len(in_t), has_b); + let test = gen_test( + name, + &in_t, + &out_t, + current_tests, + type_len(in_t), + single_para, + ); (function, test) } @@ -378,7 +391,7 @@ fn gen_test( out_t: &str, current_tests: &[(Vec, Vec, Vec)], len: usize, - has_b: bool, + single_para: bool, ) -> String { let mut test = format!( r#" @@ -390,7 +403,7 @@ fn gen_test( let a: Vec = a.iter().take(len).cloned().collect(); let b: Vec = b.iter().take(len).cloned().collect(); let e: Vec = e.iter().take(len).cloned().collect(); - let t = if has_b { + let t = if !single_para { format!( r#" let a{}; @@ -437,7 +450,7 @@ fn gen_arm( in_t: &str, out_t: &str, current_tests: &[(Vec, Vec, Vec)], - has_b: bool, + single_para: bool, fixed: &Option, ) -> (String, String) { let _global_t = type_to_global_type(in_t); @@ -486,7 +499,7 @@ fn gen_arm( } else { String::new() }; - let call = if has_b { + let call = if !single_para { format!( r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{ {}{}(a, b) @@ -501,9 +514,15 @@ fn gen_arm( } format!( r#"pub unsafe fn {}(a: {}) -> {} {{ - {}{}(a, {}({})) + {}{}(a, transmute({}::new({}))) }}"#, - name, in_t, out_t, ext_c, current_fn, in_t, fixed_vals, + name, + in_t, + out_t, + ext_c, + current_fn, + type_to_global_type(in_t), + fixed_vals, ) } else { String::new() @@ -523,7 +542,14 @@ fn gen_arm( expand_intrinsic(¤t_aarch64, in_t), call, ); - let test = gen_test(name, &in_t, &out_t, current_tests, type_len(in_t), has_b); + let test = gen_test( + name, + &in_t, + &out_t, + current_tests, + type_len(in_t), + single_para, + ); (function, test) } @@ -752,7 +778,7 @@ mod test { &in_t, &out_t, ¤t_tests, - b.len() > 0, + b.len() == 0, &fixed, ); out_arm.push_str(&function); @@ -767,7 +793,7 @@ mod test { &in_t, &out_t, ¤t_tests, - b.len() > 0, + b.len() == 0, &fixed, ); out_aarch64.push_str(&function); From 4b331e8a4b6e99ed2748a6cd38c5981f4d31e059 Mon Sep 17 00:00:00 2001 From: SparrowLii Date: Thu, 4 Mar 2021 23:54:37 +0800 Subject: [PATCH 049/123] Correct the instruction name and add floating point instructions --- .../core_arch/src/aarch64/neon/generated.rs | 194 +++++++++++++----- crates/stdarch-gen/neon.spec | 22 +- crates/stdarch-gen/src/main.rs | 46 ++--- 3 files changed, 175 insertions(+), 87 deletions(-) diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index da9bce257b..f829cc0dcf 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -73,148 +73,202 @@ pub unsafe fn vceqq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t { simd_eq(a, b) } -/// Signed Compare bitwise equal to zero +/// Signed compare bitwise equal to zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(vceqz))] +#[cfg_attr(test, assert_instr(cmeq))] pub unsafe fn vceqz_s8(a: int8x8_t) -> uint8x8_t { - simd_eq(a, transmute(i8x8::new(0, 0, 0, 0, 0, 0, 0, 0))) + let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_eq(a, transmute(b)) } -/// Signed Compare bitwise equal to zero +/// Signed compare bitwise equal to zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(vceqz))] +#[cfg_attr(test, assert_instr(cmeq))] pub unsafe fn vceqzq_s8(a: int8x16_t) -> uint8x16_t { - simd_eq(a, transmute(i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))) + let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + simd_eq(a, transmute(b)) } -/// Signed Compare bitwise equal to zero +/// Signed compare bitwise equal to zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(vceqz))] +#[cfg_attr(test, assert_instr(cmeq))] pub unsafe fn vceqz_s16(a: int16x4_t) -> uint16x4_t { - simd_eq(a, transmute(i16x4::new(0, 0, 0, 0))) + let b: i16x4 = i16x4::new(0, 0, 0, 0); + simd_eq(a, transmute(b)) } -/// Signed Compare bitwise equal to zero +/// Signed compare bitwise equal to zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(vceqz))] +#[cfg_attr(test, assert_instr(cmeq))] pub unsafe fn vceqzq_s16(a: int16x8_t) -> uint16x8_t { - simd_eq(a, transmute(i16x8::new(0, 0, 0, 0, 0, 0, 0, 0))) + let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_eq(a, transmute(b)) } -/// Signed Compare bitwise equal to zero +/// Signed compare bitwise equal to zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(vceqz))] +#[cfg_attr(test, assert_instr(cmeq))] pub unsafe fn vceqz_s32(a: int32x2_t) -> uint32x2_t { - simd_eq(a, transmute(i32x2::new(0, 0))) + let b: i32x2 = i32x2::new(0, 0); + simd_eq(a, transmute(b)) } -/// Signed Compare bitwise equal to zero +/// Signed compare bitwise equal to zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(vceqz))] +#[cfg_attr(test, assert_instr(cmeq))] pub unsafe fn vceqzq_s32(a: int32x4_t) -> uint32x4_t { - simd_eq(a, transmute(i32x4::new(0, 0, 0, 0))) + let b: i32x4 = i32x4::new(0, 0, 0, 0); + simd_eq(a, transmute(b)) } -/// Signed Compare bitwise equal to zero +/// Signed compare bitwise equal to zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(vceqz))] +#[cfg_attr(test, assert_instr(cmeq))] pub unsafe fn vceqz_s64(a: int64x1_t) -> uint64x1_t { - simd_eq(a, transmute(i64x1::new(0))) + let b: i64x1 = i64x1::new(0); + simd_eq(a, transmute(b)) } -/// Signed Compare bitwise equal to zero +/// Signed compare bitwise equal to zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(vceqz))] +#[cfg_attr(test, assert_instr(cmeq))] pub unsafe fn vceqzq_s64(a: int64x2_t) -> uint64x2_t { - simd_eq(a, transmute(i64x2::new(0, 0))) + let b: i64x2 = i64x2::new(0, 0); + simd_eq(a, transmute(b)) } -/// Signed Compare bitwise equal to zero +/// Signed compare bitwise equal to zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(vceqz))] +#[cfg_attr(test, assert_instr(cmeq))] pub unsafe fn vceqz_p64(a: poly64x1_t) -> uint64x1_t { - simd_eq(a, transmute(i64x1::new(0))) + let b: i64x1 = i64x1::new(0); + simd_eq(a, transmute(b)) } -/// Signed Compare bitwise equal to zero +/// Signed compare bitwise equal to zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(vceqz))] +#[cfg_attr(test, assert_instr(cmeq))] pub unsafe fn vceqzq_p64(a: poly64x2_t) -> uint64x2_t { - simd_eq(a, transmute(i64x2::new(0, 0))) + let b: i64x2 = i64x2::new(0, 0); + simd_eq(a, transmute(b)) } -/// Unsigned Compare bitwise equal to zero +/// Unsigned compare bitwise equal to zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(vceqz))] +#[cfg_attr(test, assert_instr(cmeq))] pub unsafe fn vceqz_u8(a: uint8x8_t) -> uint8x8_t { - simd_eq(a, transmute(u8x8::new(0, 0, 0, 0, 0, 0, 0, 0))) + let b: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_eq(a, transmute(b)) } -/// Unsigned Compare bitwise equal to zero +/// Unsigned compare bitwise equal to zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(vceqz))] +#[cfg_attr(test, assert_instr(cmeq))] pub unsafe fn vceqzq_u8(a: uint8x16_t) -> uint8x16_t { - simd_eq(a, transmute(u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))) + let b: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + simd_eq(a, transmute(b)) } -/// Unsigned Compare bitwise equal to zero +/// Unsigned compare bitwise equal to zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(vceqz))] +#[cfg_attr(test, assert_instr(cmeq))] pub unsafe fn vceqz_u16(a: uint16x4_t) -> uint16x4_t { - simd_eq(a, transmute(u16x4::new(0, 0, 0, 0))) + let b: u16x4 = u16x4::new(0, 0, 0, 0); + simd_eq(a, transmute(b)) } -/// Unsigned Compare bitwise equal to zero +/// Unsigned compare bitwise equal to zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(vceqz))] +#[cfg_attr(test, assert_instr(cmeq))] pub unsafe fn vceqzq_u16(a: uint16x8_t) -> uint16x8_t { - simd_eq(a, transmute(u16x8::new(0, 0, 0, 0, 0, 0, 0, 0))) + let b: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_eq(a, transmute(b)) } -/// Unsigned Compare bitwise equal to zero +/// Unsigned compare bitwise equal to zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(vceqz))] +#[cfg_attr(test, assert_instr(cmeq))] pub unsafe fn vceqz_u32(a: uint32x2_t) -> uint32x2_t { - simd_eq(a, transmute(u32x2::new(0, 0))) + let b: u32x2 = u32x2::new(0, 0); + simd_eq(a, transmute(b)) } -/// Unsigned Compare bitwise equal to zero +/// Unsigned compare bitwise equal to zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(vceqz))] +#[cfg_attr(test, assert_instr(cmeq))] pub unsafe fn vceqzq_u32(a: uint32x4_t) -> uint32x4_t { - simd_eq(a, transmute(u32x4::new(0, 0, 0, 0))) + let b: u32x4 = u32x4::new(0, 0, 0, 0); + simd_eq(a, transmute(b)) } -/// Unsigned Compare bitwise equal to zero +/// Unsigned compare bitwise equal to zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(vceqz))] +#[cfg_attr(test, assert_instr(cmeq))] pub unsafe fn vceqz_u64(a: uint64x1_t) -> uint64x1_t { - simd_eq(a, transmute(u64x1::new(0))) + let b: u64x1 = u64x1::new(0); + simd_eq(a, transmute(b)) } -/// Unsigned Compare bitwise equal to zero +/// Unsigned compare bitwise equal to zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(vceqz))] +#[cfg_attr(test, assert_instr(cmeq))] pub unsafe fn vceqzq_u64(a: uint64x2_t) -> uint64x2_t { - simd_eq(a, transmute(u64x2::new(0, 0))) + let b: u64x2 = u64x2::new(0, 0); + simd_eq(a, transmute(b)) +} + +/// Floating-point compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcmeq))] +pub unsafe fn vceqz_f32(a: float32x2_t) -> uint32x2_t { + let b: f32x2 = f32x2::new(0.0, 0.0); + simd_eq(a, transmute(b)) +} + +/// Floating-point compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcmeq))] +pub unsafe fn vceqzq_f32(a: float32x4_t) -> uint32x4_t { + let b: f32x4 = f32x4::new(0.0, 0.0, 0.0, 0.0); + simd_eq(a, transmute(b)) +} + +/// Floating-point compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcmeq))] +pub unsafe fn vceqz_f64(a: float64x1_t) -> uint64x1_t { + let b: f64 = 0.0; + simd_eq(a, transmute(b)) +} + +/// Floating-point compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcmeq))] +pub unsafe fn vceqzq_f64(a: float64x2_t) -> uint64x2_t { + let b: f64x2 = f64x2::new(0.0, 0.0); + simd_eq(a, transmute(b)) } /// Compare signed greater than @@ -752,6 +806,38 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vceqz_f32() { + let a: f32x2 = f32x2::new(0.0, 1.2); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); + let r: u32x2 = transmute(vceqz_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqzq_f32() { + let a: f32x4 = f32x4::new(0.0, 1.2, 3.4, 5.6); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0, 0); + let r: u32x4 = transmute(vceqzq_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqz_f64() { + let a: f64 = 0.0; + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vceqz_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqzq_f64() { + let a: f64x2 = f64x2::new(0.0, 1.2); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0); + let r: u64x2 = transmute(vceqzq_f64(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vcgt_s64() { let a: i64x1 = i64x1::new(1); diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec index 7805df501b..6c2234b127 100644 --- a/crates/stdarch-gen/neon.spec +++ b/crates/stdarch-gen/neon.spec @@ -136,26 +136,36 @@ arm = vceq. // we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t -/// Signed Compare bitwise equal to zero +/// Signed compare bitwise equal to zero name = vceqz fn = simd_eq a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX -fixed = 0 +fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 validate FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE -aarch64 = vceqz +aarch64 = cmeq generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t -/// Unsigned Compare bitwise equal to zero +/// Unsigned compare bitwise equal to zero name = vceqz fn = simd_eq a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX -fixed = 0 +fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 validate TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE -aarch64 = vceqz +aarch64 = cmeq generate uint*_t, uint64x*_t +/// Floating-point compare bitwise equal to zero +name = vceqz +fn = simd_eq +a = 0.0, 1.2, 3.4, 5.6 +fixed = 0.0, 0.0, 0.0, 0.0 +validate TRUE, FALSE, FALSE, FALSE + +aarch64 = fcmeq +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + //////////////////// // greater then //////////////////// diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs index 39836bd916..47164a7c8b 100644 --- a/crates/stdarch-gen/src/main.rs +++ b/crates/stdarch-gen/src/main.rs @@ -300,7 +300,7 @@ fn gen_aarch64( out_t: &str, current_tests: &[(Vec, Vec, Vec)], single_para: bool, - fixed: &Option, + fixed: &Vec, ) -> (String, String) { let _global_t = type_to_global_type(in_t); let _global_ret_t = type_to_global_type(out_t); @@ -342,23 +342,19 @@ fn gen_aarch64( }}"#, name, in_t, in_t, out_t, ext_c, current_fn, ) - } else if let Some(fixed_val) = fixed { - let mut fixed_vals = fixed_val.clone(); - for _i in 1..type_len(in_t) { - fixed_vals.push_str(", "); - fixed_vals.push_str(fixed_val); - } + } else if fixed.len() != 0 { + let fixed: Vec = fixed.iter().take(type_len(in_t)).cloned().collect(); format!( r#"pub unsafe fn {}(a: {}) -> {} {{ - {}{}(a, transmute({}::new({}))) + let b{}; + {}{}(a, transmute(b)) }}"#, name, in_t, out_t, + values(in_t, &fixed), ext_c, current_fn, - type_to_global_type(in_t), - fixed_vals, ) } else { String::new() @@ -451,7 +447,7 @@ fn gen_arm( out_t: &str, current_tests: &[(Vec, Vec, Vec)], single_para: bool, - fixed: &Option, + fixed: &Vec, ) -> (String, String) { let _global_t = type_to_global_type(in_t); let _global_ret_t = type_to_global_type(out_t); @@ -506,23 +502,19 @@ fn gen_arm( }}"#, name, in_t, in_t, out_t, ext_c, current_fn, ) - } else if let Some(fixed_val) = fixed { - let mut fixed_vals = fixed_val.clone(); - for _i in 1..type_len(in_t) { - fixed_vals.push_str(", "); - fixed_vals.push_str(fixed_val); - } + } else if fixed.len() != 0 { + let fixed: Vec = fixed.iter().take(type_len(in_t)).cloned().collect(); format!( r#"pub unsafe fn {}(a: {}) -> {} {{ - {}{}(a, transmute({}::new({}))) + let b{}; + {}{}(a, transmute(b)) }}"#, name, in_t, out_t, + values(in_t, &fixed), ext_c, current_fn, - type_to_global_type(in_t), - fixed_vals, ) } else { String::new() @@ -638,7 +630,8 @@ fn main() -> io::Result<()> { let mut link_aarch64: Option = None; let mut a: Vec = Vec::new(); let mut b: Vec = Vec::new(); - let mut fixed: Option = None; + let mut fixed: Vec = Vec::new(); + let mut single_para: bool = true; let mut current_tests: Vec<(Vec, Vec, Vec)> = Vec::new(); // @@ -709,9 +702,7 @@ mod test { link_aarch64 = None; link_arm = None; current_tests = Vec::new(); - a = Vec::new(); - b = Vec::new(); - fixed = None; + single_para = true; } else if line.starts_with("//") { } else if line.starts_with("name = ") { current_name = Some(String::from(&line[7..])); @@ -725,8 +716,9 @@ mod test { a = line[4..].split(',').map(|v| v.trim().to_string()).collect(); } else if line.starts_with("b = ") { b = line[4..].split(',').map(|v| v.trim().to_string()).collect(); + single_para = false; } else if line.starts_with("fixed = ") { - fixed = Some(String::from(&line[8..])); + fixed = line[8..].split(',').map(|v| v.trim().to_string()).collect(); } else if line.starts_with("validate ") { let e = line[9..].split(',').map(|v| v.trim().to_string()).collect(); current_tests.push((a.clone(), b.clone(), e)); @@ -778,7 +770,7 @@ mod test { &in_t, &out_t, ¤t_tests, - b.len() == 0, + single_para, &fixed, ); out_arm.push_str(&function); @@ -793,7 +785,7 @@ mod test { &in_t, &out_t, ¤t_tests, - b.len() == 0, + single_para, &fixed, ); out_aarch64.push_str(&function); From e34af114372319103a1856a23d47ee00fafb48a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Mi=C4=85sko?= Date: Thu, 4 Mar 2021 00:00:00 +0000 Subject: [PATCH 050/123] Convert some AVX intrinsics to const generics * _mm256_extractf128_ps * _mm256_extractf128_pd * _mm256_extractf128_si256 * _mm256_insertf128_ps * _mm256_insertf128_pd * _mm256_insertf128_si256 --- crates/core_arch/src/x86/avx.rs | 119 +++++++++++++++++--------------- 1 file changed, 62 insertions(+), 57 deletions(-) diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index 53c4a00f42..8f040fc2f5 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -983,15 +983,17 @@ pub unsafe fn _mm256_cvttps_epi32(a: __m256) -> __m256i { #[target_feature(enable = "avx")] #[cfg_attr( all(test, not(target_os = "windows")), - assert_instr(vextractf128, imm8 = 1) + assert_instr(vextractf128, IMM1 = 1) )] -#[rustc_args_required_const(1)] +#[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extractf128_ps(a: __m256, imm8: i32) -> __m128 { - match imm8 & 1 { - 0 => simd_shuffle4(a, _mm256_undefined_ps(), [0, 1, 2, 3]), - _ => simd_shuffle4(a, _mm256_undefined_ps(), [4, 5, 6, 7]), - } +pub unsafe fn _mm256_extractf128_ps(a: __m256) -> __m128 { + static_assert_imm1!(IMM1); + simd_shuffle4( + a, + _mm256_undefined_ps(), + [[0, 1, 2, 3], [4, 5, 6, 7]][IMM1 as usize], + ) } /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) @@ -1002,15 +1004,13 @@ pub unsafe fn _mm256_extractf128_ps(a: __m256, imm8: i32) -> __m128 { #[target_feature(enable = "avx")] #[cfg_attr( all(test, not(target_os = "windows")), - assert_instr(vextractf128, imm8 = 1) + assert_instr(vextractf128, IMM1 = 1) )] -#[rustc_args_required_const(1)] +#[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extractf128_pd(a: __m256d, imm8: i32) -> __m128d { - match imm8 & 1 { - 0 => simd_shuffle2(a, _mm256_undefined_pd(), [0, 1]), - _ => simd_shuffle2(a, _mm256_undefined_pd(), [2, 3]), - } +pub unsafe fn _mm256_extractf128_pd(a: __m256d) -> __m128d { + static_assert_imm1!(IMM1); + simd_shuffle2(a, _mm256_undefined_pd(), [[0, 1], [2, 3]][IMM1 as usize]) } /// Extracts 128 bits (composed of integer data) from `a`, selected with `imm8`. @@ -1020,16 +1020,17 @@ pub unsafe fn _mm256_extractf128_pd(a: __m256d, imm8: i32) -> __m128d { #[target_feature(enable = "avx")] #[cfg_attr( all(test, not(target_os = "windows")), - assert_instr(vextractf128, imm8 = 1) + assert_instr(vextractf128, IMM1 = 1) )] -#[rustc_args_required_const(1)] +#[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extractf128_si256(a: __m256i, imm8: i32) -> __m128i { - let b = _mm256_undefined_si256().as_i64x4(); - let dst: i64x2 = match imm8 & 1 { - 0 => simd_shuffle2(a.as_i64x4(), b, [0, 1]), - _ => simd_shuffle2(a.as_i64x4(), b, [2, 3]), - }; +pub unsafe fn _mm256_extractf128_si256(a: __m256i) -> __m128i { + static_assert_imm1!(IMM1); + let dst: i64x2 = simd_shuffle2( + a.as_i64x4(), + _mm256_undefined_si256().as_i64x4(), + [[0, 1], [2, 3]][IMM1 as usize], + ); transmute(dst) } @@ -1410,16 +1411,17 @@ pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr( all(test, not(target_os = "windows")), - assert_instr(vinsertf128, imm8 = 1) + assert_instr(vinsertf128, IMM1 = 1) )] -#[rustc_args_required_const(2)] +#[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128, imm8: i32) -> __m256 { - let b = _mm256_castps128_ps256(b); - match imm8 & 1 { - 0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), - _ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]), - } +pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128) -> __m256 { + static_assert_imm1!(IMM1); + simd_shuffle8( + a, + _mm256_castps128_ps256(b), + [[8, 9, 10, 11, 4, 5, 6, 7], [0, 1, 2, 3, 8, 9, 10, 11]][IMM1 as usize], + ) } /// Copies `a` to result, then inserts 128 bits (composed of 2 packed @@ -1431,15 +1433,17 @@ pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128, imm8: i32) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr( all(test, not(target_os = "windows")), - assert_instr(vinsertf128, imm8 = 1) + assert_instr(vinsertf128, IMM1 = 1) )] -#[rustc_args_required_const(2)] +#[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_insertf128_pd(a: __m256d, b: __m128d, imm8: i32) -> __m256d { - match imm8 & 1 { - 0 => simd_shuffle4(a, _mm256_castpd128_pd256(b), [4, 5, 2, 3]), - _ => simd_shuffle4(a, _mm256_castpd128_pd256(b), [0, 1, 4, 5]), - } +pub unsafe fn _mm256_insertf128_pd(a: __m256d, b: __m128d) -> __m256d { + static_assert_imm1!(IMM1); + simd_shuffle4( + a, + _mm256_castpd128_pd256(b), + [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize], + ) } /// Copies `a` to result, then inserts 128 bits from `b` into result @@ -1450,16 +1454,17 @@ pub unsafe fn _mm256_insertf128_pd(a: __m256d, b: __m128d, imm8: i32) -> __m256d #[target_feature(enable = "avx")] #[cfg_attr( all(test, not(target_os = "windows")), - assert_instr(vinsertf128, imm8 = 1) + assert_instr(vinsertf128, IMM1 = 1) )] -#[rustc_args_required_const(2)] +#[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_insertf128_si256(a: __m256i, b: __m128i, imm8: i32) -> __m256i { - let b = _mm256_castsi128_si256(b).as_i64x4(); - let dst: i64x4 = match imm8 & 1 { - 0 => simd_shuffle4(a.as_i64x4(), b, [4, 5, 2, 3]), - _ => simd_shuffle4(a.as_i64x4(), b, [0, 1, 4, 5]), - }; +pub unsafe fn _mm256_insertf128_si256(a: __m256i, b: __m128i) -> __m256i { + static_assert_imm1!(IMM1); + let dst: i64x4 = simd_shuffle4( + a.as_i64x4(), + _mm256_castsi128_si256(b).as_i64x4(), + [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize], + ); transmute(dst) } @@ -2961,7 +2966,7 @@ pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m256 { let a = _mm256_castps128_ps256(_mm_loadu_ps(loaddr)); - _mm256_insertf128_ps(a, _mm_loadu_ps(hiaddr), 1) + _mm256_insertf128_ps::<1>(a, _mm_loadu_ps(hiaddr)) } /// Loads two 128-bit values (composed of 2 packed double-precision (64-bit) @@ -2976,7 +2981,7 @@ pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m2 #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m256d { let a = _mm256_castpd128_pd256(_mm_loadu_pd(loaddr)); - _mm256_insertf128_pd(a, _mm_loadu_pd(hiaddr), 1) + _mm256_insertf128_pd::<1>(a, _mm_loadu_pd(hiaddr)) } /// Loads two 128-bit values (composed of integer data) from memory, and combine @@ -2990,7 +2995,7 @@ pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i) -> __m256i { let a = _mm256_castsi128_si256(_mm_loadu_si128(loaddr)); - _mm256_insertf128_si256(a, _mm_loadu_si128(hiaddr), 1) + _mm256_insertf128_si256::<1>(a, _mm_loadu_si128(hiaddr)) } /// Stores the high and low 128-bit halves (each composed of 4 packed @@ -3006,7 +3011,7 @@ pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256) { let lo = _mm256_castps256_ps128(a); _mm_storeu_ps(loaddr, lo); - let hi = _mm256_extractf128_ps(a, 1); + let hi = _mm256_extractf128_ps::<1>(a); _mm_storeu_ps(hiaddr, hi); } @@ -3023,7 +3028,7 @@ pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256) pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256d) { let lo = _mm256_castpd256_pd128(a); _mm_storeu_pd(loaddr, lo); - let hi = _mm256_extractf128_pd(a, 1); + let hi = _mm256_extractf128_pd::<1>(a); _mm_storeu_pd(hiaddr, hi); } @@ -3039,7 +3044,7 @@ pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256 pub unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i) { let lo = _mm256_castsi256_si128(a); _mm_storeu_si128(loaddr, lo); - let hi = _mm256_extractf128_si256(a, 1); + let hi = _mm256_extractf128_si256::<1>(a); _mm_storeu_si128(hiaddr, hi); } @@ -3727,7 +3732,7 @@ mod tests { #[simd_test(enable = "avx")] unsafe fn test_mm256_extractf128_ps() { let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); - let r = _mm256_extractf128_ps(a, 0); + let r = _mm256_extractf128_ps::<0>(a); let e = _mm_setr_ps(4., 3., 2., 5.); assert_eq_m128(r, e); } @@ -3735,7 +3740,7 @@ mod tests { #[simd_test(enable = "avx")] unsafe fn test_mm256_extractf128_pd() { let a = _mm256_setr_pd(4., 3., 2., 5.); - let r = _mm256_extractf128_pd(a, 0); + let r = _mm256_extractf128_pd::<0>(a); let e = _mm_setr_pd(4., 3.); assert_eq_m128d(r, e); } @@ -3743,7 +3748,7 @@ mod tests { #[simd_test(enable = "avx")] unsafe fn test_mm256_extractf128_si256() { let a = _mm256_setr_epi64x(4, 3, 2, 5); - let r = _mm256_extractf128_si256(a, 0); + let r = _mm256_extractf128_si256::<0>(a); let e = _mm_setr_epi64x(4, 3); assert_eq_m128i(r, e); } @@ -3894,7 +3899,7 @@ mod tests { unsafe fn test_mm256_insertf128_ps() { let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); let b = _mm_setr_ps(4., 9., 16., 25.); - let r = _mm256_insertf128_ps(a, b, 0); + let r = _mm256_insertf128_ps::<0>(a, b); let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.); assert_eq_m256(r, e); } @@ -3903,7 +3908,7 @@ mod tests { unsafe fn test_mm256_insertf128_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm_setr_pd(5., 6.); - let r = _mm256_insertf128_pd(a, b, 0); + let r = _mm256_insertf128_pd::<0>(a, b); let e = _mm256_setr_pd(5., 6., 3., 4.); assert_eq_m256d(r, e); } @@ -3912,7 +3917,7 @@ mod tests { unsafe fn test_mm256_insertf128_si256() { let a = _mm256_setr_epi64x(1, 2, 3, 4); let b = _mm_setr_epi64x(5, 6); - let r = _mm256_insertf128_si256(a, b, 0); + let r = _mm256_insertf128_si256::<0>(a, b); let e = _mm256_setr_epi64x(5, 6, 3, 4); assert_eq_m256i(r, e); } From 7415f3d6ec267204829732719dbbe9aeb0668d3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 21:14:27 +0100 Subject: [PATCH 051/123] convert `_mm_aeskeygenassist_si128` to const generics --- crates/core_arch/src/x86/aes.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/aes.rs b/crates/core_arch/src/x86/aes.rs index 603744aef6..ffded1a0dc 100644 --- a/crates/core_arch/src/x86/aes.rs +++ b/crates/core_arch/src/x86/aes.rs @@ -87,21 +87,17 @@ pub unsafe fn _mm_aesimc_si128(a: __m128i) -> __m128i { /// /// Assist in expanding the AES cipher key by computing steps towards /// generating a round key for encryption cipher using data from `a` and an -/// 8-bit round constant `imm8`. +/// 8-bit round constant `IMM8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aeskeygenassist_si128) #[inline] #[target_feature(enable = "aes")] -#[cfg_attr(test, assert_instr(aeskeygenassist, imm8 = 0))] -#[rustc_args_required_const(1)] +#[cfg_attr(test, assert_instr(aeskeygenassist, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_aeskeygenassist_si128(a: __m128i, imm8: i32) -> __m128i { - macro_rules! call { - ($imm8:expr) => { - aeskeygenassist(a, $imm8) - }; - } - constify_imm8!(imm8, call) +pub unsafe fn _mm_aeskeygenassist_si128(a: __m128i) -> __m128i { + static_assert_imm8!(IMM8); + aeskeygenassist(a, IMM8 as u8) } #[cfg(test)] @@ -169,7 +165,7 @@ mod tests { // Constants taken from https://msdn.microsoft.com/en-us/library/cc714138.aspx. let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); let e = _mm_set_epi64x(0x857c266b7c266e85, 0xeac4eea9c4eeacea); - let r = _mm_aeskeygenassist_si128(a, 5); + let r = _mm_aeskeygenassist_si128::<5>(a); assert_eq_m128i(r, e); } } From d866ab5e1414fc56c6724adda6d2acb6a40ac422 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 21:15:31 +0100 Subject: [PATCH 052/123] convert `_mm_extract_epi64` to const generics --- crates/core_arch/src/x86_64/sse41.rs | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/crates/core_arch/src/x86_64/sse41.rs b/crates/core_arch/src/x86_64/sse41.rs index 1b37967325..d0fd68d5f5 100644 --- a/crates/core_arch/src/x86_64/sse41.rs +++ b/crates/core_arch/src/x86_64/sse41.rs @@ -8,20 +8,17 @@ use crate::{ #[cfg(test)] use stdarch_test::assert_instr; -/// Extracts an 64-bit integer from `a` selected with `imm8` +/// Extracts an 64-bit integer from `a` selected with `IMM1` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi64) #[inline] #[target_feature(enable = "sse4.1")] -#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(pextrq, imm8 = 1))] -#[rustc_args_required_const(1)] +#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(pextrq, IMM1 = 1))] +#[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_extract_epi64(a: __m128i, imm8: i32) -> i64 { - let a = a.as_i64x2(); - match imm8 & 1 { - 0 => simd_extract(a, 0), - _ => simd_extract(a, 1), - } +pub unsafe fn _mm_extract_epi64(a: __m128i) -> i64 { + static_assert_imm1!(IMM1); + simd_extract(a.as_i64x2(), IMM1 as u32) } /// Returns a copy of `a` with the 64-bit integer from `i` inserted at a @@ -49,10 +46,10 @@ mod tests { #[simd_test(enable = "sse4.1")] unsafe fn test_mm_extract_epi64() { let a = _mm_setr_epi64x(0, 1); - let r = _mm_extract_epi64(a, 1); - assert_eq!(r, 1); - let r = _mm_extract_epi64(a, 3); + let r = _mm_extract_epi64::<1>(a); assert_eq!(r, 1); + let r = _mm_extract_epi64::<0>(a); + assert_eq!(r, 0); } #[simd_test(enable = "sse4.1")] From 28063ea5fc04c193361c07bc28c8d4a98b6dc965 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 21:15:59 +0100 Subject: [PATCH 053/123] convert `_mm_insert_epi64` to const generics --- crates/core_arch/src/x86/avx2.rs | 2 +- crates/core_arch/src/x86/test.rs | 6 ++++-- crates/core_arch/src/x86_64/sse41.rs | 20 +++++++++----------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs index 785b0fe9bb..81d7adc0bb 100644 --- a/crates/core_arch/src/x86/avx2.rs +++ b/crates/core_arch/src/x86/avx2.rs @@ -5135,7 +5135,7 @@ mod tests { #[simd_test(enable = "avx2")] unsafe fn test_mm256_sll_epi64() { let a = _mm256_set1_epi64x(0xFFFFFFFF); - let b = _mm_insert_epi64(_mm_set1_epi64x(0), 4, 0); + let b = _mm_insert_epi64::<0>(_mm_set1_epi64x(0), 4); let r = _mm256_sll_epi64(a, b); assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF0)); } diff --git a/crates/core_arch/src/x86/test.rs b/crates/core_arch/src/x86/test.rs index 9f577972fa..9f8b969301 100644 --- a/crates/core_arch/src/x86/test.rs +++ b/crates/core_arch/src/x86/test.rs @@ -92,14 +92,16 @@ pub unsafe fn get_m512i(a: __m512i, idx: usize) -> i64 { mod x86_polyfill { use crate::core_arch::x86::*; - pub unsafe fn _mm_insert_epi64(a: __m128i, val: i64, idx: i32) -> __m128i { + #[rustc_legacy_const_generics(2)] + pub unsafe fn _mm_insert_epi64(a: __m128i, val: i64) -> __m128i { + static_assert_imm1!(INDEX); #[repr(C)] union A { a: __m128i, b: [i64; 2], } let mut a = A { a }; - a.b[idx as usize] = val; + a.b[INDEX as usize] = val; a.a } diff --git a/crates/core_arch/src/x86_64/sse41.rs b/crates/core_arch/src/x86_64/sse41.rs index d0fd68d5f5..3d1ea0cf65 100644 --- a/crates/core_arch/src/x86_64/sse41.rs +++ b/crates/core_arch/src/x86_64/sse41.rs @@ -22,20 +22,17 @@ pub unsafe fn _mm_extract_epi64(a: __m128i) -> i64 { } /// Returns a copy of `a` with the 64-bit integer from `i` inserted at a -/// location specified by `imm8`. +/// location specified by `IMM1`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi64) #[inline] #[target_feature(enable = "sse4.1")] -#[cfg_attr(test, assert_instr(pinsrq, imm8 = 0))] -#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(pinsrq, IMM1 = 0))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_insert_epi64(a: __m128i, i: i64, imm8: i32) -> __m128i { - let a = a.as_i64x2(); - match imm8 & 1 { - 0 => transmute(simd_insert(a, 0, i)), - _ => transmute(simd_insert(a, 1, i)), - } +pub unsafe fn _mm_insert_epi64(a: __m128i, i: i64) -> __m128i { + static_assert_imm1!(IMM1); + transmute(simd_insert(a.as_i64x2(), IMM1 as u32, i)) } #[cfg(test)] @@ -56,9 +53,10 @@ mod tests { unsafe fn test_mm_insert_epi64() { let a = _mm_set1_epi64x(0); let e = _mm_setr_epi64x(0, 32); - let r = _mm_insert_epi64(a, 32, 1); + let r = _mm_insert_epi64::<1>(a, 32); assert_eq_m128i(r, e); - let r = _mm_insert_epi64(a, 32, 3); + let e = _mm_setr_epi64x(32, 0); + let r = _mm_insert_epi64::<0>(a, 32); assert_eq_m128i(r, e); } } From dd7f711a87b9d298777b1c7634dd3dd44ca297a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 21:25:17 +0100 Subject: [PATCH 054/123] convert `_mm256_extract_epi64` to const generics --- crates/core_arch/src/x86_64/avx2.rs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86_64/avx2.rs b/crates/core_arch/src/x86_64/avx2.rs index 0f81cd221f..14447a1371 100644 --- a/crates/core_arch/src/x86_64/avx2.rs +++ b/crates/core_arch/src/x86_64/avx2.rs @@ -20,22 +20,17 @@ use crate::core_arch::{simd_llvm::*, x86::*}; -/// Extracts a 64-bit integer from `a`, selected with `imm8`. +/// Extracts a 64-bit integer from `a`, selected with `INDEX`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi64) #[inline] #[target_feature(enable = "avx2")] -#[rustc_args_required_const(1)] +#[rustc_legacy_const_generics(1)] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extract_epi64(a: __m256i, imm8: i32) -> i64 { - let a = a.as_i64x4(); - match imm8 & 3 { - 0 => simd_extract(a, 0), - 1 => simd_extract(a, 1), - 2 => simd_extract(a, 2), - _ => simd_extract(a, 3), - } +pub unsafe fn _mm256_extract_epi64(a: __m256i) -> i64 { + static_assert_imm2!(INDEX); + simd_extract(a.as_i64x4(), INDEX as u32) } #[cfg(test)] @@ -46,7 +41,7 @@ mod tests { #[simd_test(enable = "avx2")] unsafe fn test_mm256_extract_epi64() { let a = _mm256_setr_epi64x(0, 1, 2, 3); - let r = _mm256_extract_epi64(a, 3); + let r = _mm256_extract_epi64::<3>(a); assert_eq!(r, 3); } } From 0bc1b079539ec37824c77ccb673774c72a49834d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 21:31:46 +0100 Subject: [PATCH 055/123] convert `_mm_sha1rnds4_epu32` to const generics --- crates/core_arch/src/x86/sha.rs | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/crates/core_arch/src/x86/sha.rs b/crates/core_arch/src/x86/sha.rs index 362a97ccd3..cfb330cfbb 100644 --- a/crates/core_arch/src/x86/sha.rs +++ b/crates/core_arch/src/x86/sha.rs @@ -66,25 +66,18 @@ pub unsafe fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i { /// Performs four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D) /// from `a` and some pre-computed sum of the next 4 round message values /// (unsigned 32-bit integers), and state variable E from `b`, and return the -/// updated SHA1 state (A,B,C,D). `func` contains the logic functions and round +/// updated SHA1 state (A,B,C,D). `FUNC` contains the logic functions and round /// constants. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha1rnds4_epu32) #[inline] #[target_feature(enable = "sha")] -#[cfg_attr(test, assert_instr(sha1rnds4, func = 0))] -#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(sha1rnds4, FUNC = 0))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sha1rnds4_epu32(a: __m128i, b: __m128i, func: i32) -> __m128i { - let a = a.as_i32x4(); - let b = b.as_i32x4(); - macro_rules! call { - ($imm2:expr) => { - sha1rnds4(a, b, $imm2) - }; - } - let ret = constify_imm2!(func, call); - transmute(ret) +pub unsafe fn _mm_sha1rnds4_epu32(a: __m128i, b: __m128i) -> __m128i { + static_assert_imm2!(FUNC); + transmute(sha1rnds4(a.as_i32x4(), b.as_i32x4(), FUNC as i8)) } /// Performs an intermediate calculation for the next four SHA256 message values @@ -179,19 +172,19 @@ mod tests { let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); let expected = _mm_set_epi64x(0x32b13cd8322f5268, 0xc54420862bd9246f); - let r = _mm_sha1rnds4_epu32(a, b, 0); + let r = _mm_sha1rnds4_epu32::<0>(a, b); assert_eq_m128i(r, expected); let expected = _mm_set_epi64x(0x6d4c43e56a3c25d9, 0xa7e00fb775cbd3fe); - let r = _mm_sha1rnds4_epu32(a, b, 1); + let r = _mm_sha1rnds4_epu32::<1>(a, b); assert_eq_m128i(r, expected); let expected = _mm_set_epi64x(0xb304e383c01222f4, 0x66f6b3b1f89d8001); - let r = _mm_sha1rnds4_epu32(a, b, 2); + let r = _mm_sha1rnds4_epu32::<2>(a, b); assert_eq_m128i(r, expected); let expected = _mm_set_epi64x(0x8189b758bfabfa79, 0xdb08f6e78cae098b); - let r = _mm_sha1rnds4_epu32(a, b, 3); + let r = _mm_sha1rnds4_epu32::<3>(a, b); assert_eq_m128i(r, expected); } From 1787257e8db11ab8d3000158c36027e51bce4290 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 21:32:36 +0100 Subject: [PATCH 056/123] remove unused constify_imm x86 macro --- crates/core_arch/src/x86/macros.rs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/crates/core_arch/src/x86/macros.rs b/crates/core_arch/src/x86/macros.rs index 47ceaeb20a..8cacbf44c5 100644 --- a/crates/core_arch/src/x86/macros.rs +++ b/crates/core_arch/src/x86/macros.rs @@ -48,18 +48,6 @@ macro_rules! constify_imm3 { }; } -macro_rules! constify_imm2 { - ($imm8:expr, $expand:ident) => { - #[allow(overflowing_literals)] - match ($imm8) & 0b11 { - 0 => $expand!(0), - 1 => $expand!(1), - 2 => $expand!(2), - _ => $expand!(3), - } - }; -} - // Constifies 5 bits along with an sae option without rounding control. // See: https://github.com/llvm/llvm-project/blob/bd50cf905fa7c0c7caa134301c6ca0658c81eeb1/clang/lib/Sema/SemaChecking.cpp#L3497 #[allow(unused)] From b4a176d1da8d12ce5fa7e86ad4033c92f4a5fdff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 21:58:13 +0100 Subject: [PATCH 057/123] convert `_mm_clmulepi64_si128` to const generics --- crates/core_arch/src/x86/avx512vpclmulqdq.rs | 16 +++++----- crates/core_arch/src/x86/pclmulqdq.rs | 32 +++++++++----------- 2 files changed, 22 insertions(+), 26 deletions(-) diff --git a/crates/core_arch/src/x86/avx512vpclmulqdq.rs b/crates/core_arch/src/x86/avx512vpclmulqdq.rs index 831ab7f642..b4e472dc3f 100644 --- a/crates/core_arch/src/x86/avx512vpclmulqdq.rs +++ b/crates/core_arch/src/x86/avx512vpclmulqdq.rs @@ -221,19 +221,19 @@ mod tests { ); verify_512_helper( - |a, b| _mm_clmulepi64_si128(a, b, 0x00), + |a, b| _mm_clmulepi64_si128::<0x00>(a, b), |a, b| _mm512_clmulepi64_epi128(a, b, 0x00), ); verify_512_helper( - |a, b| _mm_clmulepi64_si128(a, b, 0x01), + |a, b| _mm_clmulepi64_si128::<0x01>(a, b), |a, b| _mm512_clmulepi64_epi128(a, b, 0x01), ); verify_512_helper( - |a, b| _mm_clmulepi64_si128(a, b, 0x10), + |a, b| _mm_clmulepi64_si128::<0x10>(a, b), |a, b| _mm512_clmulepi64_epi128(a, b, 0x10), ); verify_512_helper( - |a, b| _mm_clmulepi64_si128(a, b, 0x11), + |a, b| _mm_clmulepi64_si128::<0x11>(a, b), |a, b| _mm512_clmulepi64_epi128(a, b, 0x11), ); } @@ -247,19 +247,19 @@ mod tests { ); verify_256_helper( - |a, b| _mm_clmulepi64_si128(a, b, 0x00), + |a, b| _mm_clmulepi64_si128::<0x00>(a, b), |a, b| _mm256_clmulepi64_epi128(a, b, 0x00), ); verify_256_helper( - |a, b| _mm_clmulepi64_si128(a, b, 0x01), + |a, b| _mm_clmulepi64_si128::<0x01>(a, b), |a, b| _mm256_clmulepi64_epi128(a, b, 0x01), ); verify_256_helper( - |a, b| _mm_clmulepi64_si128(a, b, 0x10), + |a, b| _mm_clmulepi64_si128::<0x10>(a, b), |a, b| _mm256_clmulepi64_epi128(a, b, 0x10), ); verify_256_helper( - |a, b| _mm_clmulepi64_si128(a, b, 0x11), + |a, b| _mm_clmulepi64_si128::<0x11>(a, b), |a, b| _mm256_clmulepi64_epi128(a, b, 0x11), ); } diff --git a/crates/core_arch/src/x86/pclmulqdq.rs b/crates/core_arch/src/x86/pclmulqdq.rs index 0e1bebae9e..6ccf3a62a6 100644 --- a/crates/core_arch/src/x86/pclmulqdq.rs +++ b/crates/core_arch/src/x86/pclmulqdq.rs @@ -25,20 +25,16 @@ extern "C" { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clmulepi64_si128) #[inline] #[target_feature(enable = "pclmulqdq")] -#[cfg_attr(all(test, not(target_os = "linux")), assert_instr(pclmulqdq, imm8 = 0))] -#[cfg_attr(all(test, target_os = "linux"), assert_instr(pclmullqlqdq, imm8 = 0))] -#[cfg_attr(all(test, target_os = "linux"), assert_instr(pclmulhqlqdq, imm8 = 1))] -#[cfg_attr(all(test, target_os = "linux"), assert_instr(pclmullqhqdq, imm8 = 16))] -#[cfg_attr(all(test, target_os = "linux"), assert_instr(pclmulhqhqdq, imm8 = 17))] -#[rustc_args_required_const(2)] +#[cfg_attr(all(test, not(target_os = "linux")), assert_instr(pclmulqdq, IMM8 = 0))] +#[cfg_attr(all(test, target_os = "linux"), assert_instr(pclmullqlqdq, IMM8 = 0))] +#[cfg_attr(all(test, target_os = "linux"), assert_instr(pclmulhqlqdq, IMM8 = 1))] +#[cfg_attr(all(test, target_os = "linux"), assert_instr(pclmullqhqdq, IMM8 = 16))] +#[cfg_attr(all(test, target_os = "linux"), assert_instr(pclmulhqhqdq, IMM8 = 17))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_clmulepi64_si128(a: __m128i, b: __m128i, imm8: i32) -> __m128i { - macro_rules! call { - ($imm8:expr) => { - pclmulqdq(a, b, $imm8) - }; - } - constify_imm8!(imm8, call) +pub unsafe fn _mm_clmulepi64_si128(a: __m128i, b: __m128i) -> __m128i { + static_assert_imm8!(IMM8); + pclmulqdq(a, b, IMM8 as u8) } #[cfg(test)] @@ -62,13 +58,13 @@ mod tests { let r10 = _mm_set_epi64x(0x1a2bf6db3a30862f, 0xbabf262df4b7d5c9); let r11 = _mm_set_epi64x(0x1d1e1f2c592e7c45, 0xd66ee03e410fd4ed); - assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x00), r00); - assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x10), r01); - assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x01), r10); - assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x11), r11); + assert_eq_m128i(_mm_clmulepi64_si128::<0x00>(a, b), r00); + assert_eq_m128i(_mm_clmulepi64_si128::<0x10>(a, b), r01); + assert_eq_m128i(_mm_clmulepi64_si128::<0x01>(a, b), r10); + assert_eq_m128i(_mm_clmulepi64_si128::<0x11>(a, b), r11); let a0 = _mm_set_epi64x(0x0000000000000000, 0x8000000000000000); let r = _mm_set_epi64x(0x4000000000000000, 0x0000000000000000); - assert_eq_m128i(_mm_clmulepi64_si128(a0, a0, 0x00), r); + assert_eq_m128i(_mm_clmulepi64_si128::<0x00>(a0, a0), r); } } From 23ea66383579dc8e5fe7f3307dd16c89c02b66c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:10:08 +0100 Subject: [PATCH 058/123] convert `_mm512_cmp_epu16_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index b6fa9d254a..3a7f397292 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -3689,22 +3689,18 @@ pub unsafe fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) - _mm_cmpneq_epi8_mask(a, b) & k1 } -/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. +/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by `IMM8`, and store the results in mask vector k. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epu16_mask&expand=715) #[inline] #[target_feature(enable = "avx512bw")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm512_cmp_epu16_mask(a: __m512i, b: __m512i, imm8: i32) -> __mmask32 { +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm512_cmp_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { + static_assert_imm3!(IMM8); let a = a.as_u16x32(); let b = b.as_u16x32(); - macro_rules! call { - ($imm3:expr) => { - vpcmpuw(a, b, $imm3, 0b11111111_11111111_11111111_11111111) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpuw(a, b, IMM8, 0b11111111_11111111_11111111_11111111); transmute(r) } @@ -13456,7 +13452,7 @@ mod tests { unsafe fn test_mm512_cmp_epu16_mask() { let a = _mm512_set1_epi16(0); let b = _mm512_set1_epi16(1); - let m = _mm512_cmp_epu16_mask(a, b, _MM_CMPINT_LT); + let m = _mm512_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b); assert_eq!(m, 0b11111111_11111111_11111111_11111111); } From d56aa643b9d68f8bf4947190604f7384f344616a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:23:55 +0100 Subject: [PATCH 059/123] convert `_mm512_mask_cmp_epu16_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 3a7f397292..1feebf50bb 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -3709,22 +3709,17 @@ pub unsafe fn _mm512_cmp_epu16_mask(a: __m512i, b: __m512i) -> /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epu16_mask&expand=716) #[inline] #[target_feature(enable = "avx512bw")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm512_mask_cmp_epu16_mask( +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm512_mask_cmp_epu16_mask( k1: __mmask32, a: __m512i, b: __m512i, - imm8: i32, ) -> __mmask32 { + static_assert_imm3!(IMM8); let a = a.as_u16x32(); let b = b.as_u16x32(); - macro_rules! call { - ($imm3:expr) => { - vpcmpuw(a, b, $imm3, k1) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpuw(a, b, IMM8, k1); transmute(r) } @@ -13461,7 +13456,7 @@ mod tests { let a = _mm512_set1_epi16(0); let b = _mm512_set1_epi16(1); let mask = 0b01010101_01010101_01010101_01010101; - let r = _mm512_mask_cmp_epu16_mask(mask, a, b, _MM_CMPINT_LT); + let r = _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b); assert_eq!(r, 0b01010101_01010101_01010101_01010101); } From 80a9a08a5a34ec11696cf10a6a0368ea43fb4f16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:27:28 +0100 Subject: [PATCH 060/123] convert `_mm256_cmp_epu16_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 1feebf50bb..9bfc4e9d44 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -3728,17 +3728,13 @@ pub unsafe fn _mm512_mask_cmp_epu16_mask( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_epu16_mask&expand=713) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm256_cmp_epu16_mask(a: __m256i, b: __m256i, imm8: i32) -> __mmask16 { +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm256_cmp_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { + static_assert_imm3!(IMM8); let a = a.as_u16x16(); let b = b.as_u16x16(); - macro_rules! call { - ($imm3:expr) => { - vpcmpuw256(a, b, $imm3, 0b11111111_11111111) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpuw256(a, b, IMM8, 0b11111111_11111111); transmute(r) } @@ -13464,7 +13460,7 @@ mod tests { unsafe fn test_mm256_cmp_epu16_mask() { let a = _mm256_set1_epi16(0); let b = _mm256_set1_epi16(1); - let m = _mm256_cmp_epu16_mask(a, b, _MM_CMPINT_LT); + let m = _mm256_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b); assert_eq!(m, 0b11111111_11111111); } From 5fc82f5ead8ee4e1ff5bb2d5c138fddb49f0611f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:29:17 +0100 Subject: [PATCH 061/123] convert `_mm256_mask_cmp_epu16_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 9bfc4e9d44..4df9389029 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -3743,22 +3743,17 @@ pub unsafe fn _mm256_cmp_epu16_mask(a: __m256i, b: __m256i) -> /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_epu16_mask&expand=714) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm256_mask_cmp_epu16_mask( +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm256_mask_cmp_epu16_mask( k1: __mmask16, a: __m256i, b: __m256i, - imm8: i32, ) -> __mmask16 { + static_assert_imm3!(IMM8); let a = a.as_u16x16(); let b = b.as_u16x16(); - macro_rules! call { - ($imm3:expr) => { - vpcmpuw256(a, b, $imm3, k1) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpuw256(a, b, IMM8, k1); transmute(r) } @@ -13469,7 +13464,7 @@ mod tests { let a = _mm256_set1_epi16(0); let b = _mm256_set1_epi16(1); let mask = 0b01010101_01010101; - let r = _mm256_mask_cmp_epu16_mask(mask, a, b, _MM_CMPINT_LT); + let r = _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b); assert_eq!(r, 0b01010101_01010101); } From 9bd881c00ba0eb695ddcf7973b04a8e6637333ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:30:13 +0100 Subject: [PATCH 062/123] convert `_mm_cmp_epu16_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 4df9389029..74e0ef6c0d 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -3762,17 +3762,13 @@ pub unsafe fn _mm256_mask_cmp_epu16_mask( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_epu16_mask&expand=711) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm_cmp_epu16_mask(a: __m128i, b: __m128i, imm8: i32) -> __mmask8 { +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm_cmp_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { + static_assert_imm3!(IMM8); let a = a.as_u16x8(); let b = b.as_u16x8(); - macro_rules! call { - ($imm3:expr) => { - vpcmpuw128(a, b, $imm3, 0b11111111) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpuw128(a, b, IMM8, 0b11111111); transmute(r) } @@ -13472,7 +13468,7 @@ mod tests { unsafe fn test_mm_cmp_epu16_mask() { let a = _mm_set1_epi16(0); let b = _mm_set1_epi16(1); - let m = _mm_cmp_epu16_mask(a, b, _MM_CMPINT_LT); + let m = _mm_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b); assert_eq!(m, 0b11111111); } From 27e36f3e179970ced38743a32e9e9758303dfd28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:32:50 +0100 Subject: [PATCH 063/123] convert `_mm_mask_cmp_epu16_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 74e0ef6c0d..f7b916d7c0 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -3777,17 +3777,17 @@ pub unsafe fn _mm_cmp_epu16_mask(a: __m128i, b: __m128i) -> __m /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_epu16_mask&expand=712) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm_mask_cmp_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __mmask8 { +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm_mask_cmp_epu16_mask( + k1: __mmask8, + a: __m128i, + b: __m128i, +) -> __mmask8 { + static_assert_imm3!(IMM8); let a = a.as_u16x8(); let b = b.as_u16x8(); - macro_rules! call { - ($imm3:expr) => { - vpcmpuw128(a, b, $imm3, k1) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpuw128(a, b, IMM8, k1); transmute(r) } @@ -13477,7 +13477,7 @@ mod tests { let a = _mm_set1_epi16(0); let b = _mm_set1_epi16(1); let mask = 0b01010101; - let r = _mm_mask_cmp_epu16_mask(mask, a, b, _MM_CMPINT_LT); + let r = _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b); assert_eq!(r, 0b01010101); } From 72f5b431839cde31c11e06a9d59a727717957c51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:33:56 +0100 Subject: [PATCH 064/123] convert `_mm512_cmp_epu8_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index f7b916d7c0..4ad3c0901f 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -3796,22 +3796,18 @@ pub unsafe fn _mm_mask_cmp_epu16_mask( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epu8_mask&expand=733) #[inline] #[target_feature(enable = "avx512bw")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm512_cmp_epu8_mask(a: __m512i, b: __m512i, imm8: i32) -> __mmask64 { +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm512_cmp_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { + static_assert_imm3!(IMM8); let a = a.as_u8x64(); let b = b.as_u8x64(); - macro_rules! call { - ($imm3:expr) => { - vpcmpub( - a, - b, - $imm3, - 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, - ) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpub( + a, + b, + IMM8, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + ); transmute(r) } @@ -13485,7 +13481,7 @@ mod tests { unsafe fn test_mm512_cmp_epu8_mask() { let a = _mm512_set1_epi8(0); let b = _mm512_set1_epi8(1); - let m = _mm512_cmp_epu8_mask(a, b, _MM_CMPINT_LT); + let m = _mm512_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b); assert_eq!( m, 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 From bf834810fed16b6e82a500fb7bf028c1031b9140 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:35:11 +0100 Subject: [PATCH 065/123] convert `_mm512_mask_cmp_epu8_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 4ad3c0901f..51f83f4fa5 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -3816,22 +3816,17 @@ pub unsafe fn _mm512_cmp_epu8_mask(a: __m512i, b: __m512i) -> _ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epu8_mask&expand=734) #[inline] #[target_feature(enable = "avx512bw")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm512_mask_cmp_epu8_mask( +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm512_mask_cmp_epu8_mask( k1: __mmask64, a: __m512i, b: __m512i, - imm8: i32, ) -> __mmask64 { + static_assert_imm3!(IMM8); let a = a.as_u8x64(); let b = b.as_u8x64(); - macro_rules! call { - ($imm3:expr) => { - vpcmpub(a, b, $imm3, k1) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpub(a, b, IMM8, k1); transmute(r) } @@ -13493,7 +13488,7 @@ mod tests { let a = _mm512_set1_epi8(0); let b = _mm512_set1_epi8(1); let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; - let r = _mm512_mask_cmp_epu8_mask(mask, a, b, _MM_CMPINT_LT); + let r = _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b); assert_eq!( r, 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 From c5a2a4c4fe5f182c8d8aa032281ab03277b55fb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:35:57 +0100 Subject: [PATCH 066/123] convert `_mm256_cmp_epu8_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 51f83f4fa5..e74b987005 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -3835,17 +3835,13 @@ pub unsafe fn _mm512_mask_cmp_epu8_mask( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_epu8_mask&expand=731) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm256_cmp_epu8_mask(a: __m256i, b: __m256i, imm8: i32) -> __mmask32 { +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm256_cmp_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { + static_assert_imm3!(IMM8); let a = a.as_u8x32(); let b = b.as_u8x32(); - macro_rules! call { - ($imm3:expr) => { - vpcmpub256(a, b, $imm3, 0b11111111_11111111_11111111_11111111) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpub256(a, b, IMM8, 0b11111111_11111111_11111111_11111111); transmute(r) } @@ -13499,7 +13495,7 @@ mod tests { unsafe fn test_mm256_cmp_epu8_mask() { let a = _mm256_set1_epi8(0); let b = _mm256_set1_epi8(1); - let m = _mm256_cmp_epu8_mask(a, b, _MM_CMPINT_LT); + let m = _mm256_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b); assert_eq!(m, 0b11111111_11111111_11111111_11111111); } From ff17cf8debbb47a3a977c6b62e52e140f02f14f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:36:44 +0100 Subject: [PATCH 067/123] convert `_mm256_mask_cmp_epu8_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index e74b987005..3905a35227 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -3850,22 +3850,17 @@ pub unsafe fn _mm256_cmp_epu8_mask(a: __m256i, b: __m256i) -> _ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_epu8_mask&expand=732) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm256_mask_cmp_epu8_mask( +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm256_mask_cmp_epu8_mask( k1: __mmask32, a: __m256i, b: __m256i, - imm8: i32, ) -> __mmask32 { + static_assert_imm3!(IMM8); let a = a.as_u8x32(); let b = b.as_u8x32(); - macro_rules! call { - ($imm3:expr) => { - vpcmpub256(a, b, $imm3, k1) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpub256(a, b, IMM8, k1); transmute(r) } @@ -13504,7 +13499,7 @@ mod tests { let a = _mm256_set1_epi8(0); let b = _mm256_set1_epi8(1); let mask = 0b01010101_01010101_01010101_01010101; - let r = _mm256_mask_cmp_epu8_mask(mask, a, b, _MM_CMPINT_LT); + let r = _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b); assert_eq!(r, 0b01010101_01010101_01010101_01010101); } From 990616d867961634f3e344770a140e4dc6fc8412 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:37:38 +0100 Subject: [PATCH 068/123] convert `_mm_cmp_epu8_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 3905a35227..4f1c271401 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -3869,17 +3869,13 @@ pub unsafe fn _mm256_mask_cmp_epu8_mask( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_epu8_mask&expand=729) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm_cmp_epu8_mask(a: __m128i, b: __m128i, imm8: i32) -> __mmask16 { +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm_cmp_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { + static_assert_imm3!(IMM8); let a = a.as_u8x16(); let b = b.as_u8x16(); - macro_rules! call { - ($imm3:expr) => { - vpcmpub128(a, b, $imm3, 0b11111111_11111111) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpub128(a, b, IMM8, 0b11111111_11111111); transmute(r) } @@ -13507,7 +13503,7 @@ mod tests { unsafe fn test_mm_cmp_epu8_mask() { let a = _mm_set1_epi8(0); let b = _mm_set1_epi8(1); - let m = _mm_cmp_epu8_mask(a, b, _MM_CMPINT_LT); + let m = _mm_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b); assert_eq!(m, 0b11111111_11111111); } From ede02dd26dbd505a9ee6366b01d680df06688e1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:38:29 +0100 Subject: [PATCH 069/123] convert `_mm_mask_cmp_epu8_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 4f1c271401..e011cb4749 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -3884,22 +3884,17 @@ pub unsafe fn _mm_cmp_epu8_mask(a: __m128i, b: __m128i) -> __mm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_epu8_mask&expand=730) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm_mask_cmp_epu8_mask( +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm_mask_cmp_epu8_mask( k1: __mmask16, a: __m128i, b: __m128i, - imm8: i32, ) -> __mmask16 { + static_assert_imm3!(IMM8); let a = a.as_u8x16(); let b = b.as_u8x16(); - macro_rules! call { - ($imm3:expr) => { - vpcmpub128(a, b, $imm3, k1) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpub128(a, b, IMM8, k1); transmute(r) } @@ -13512,7 +13507,7 @@ mod tests { let a = _mm_set1_epi8(0); let b = _mm_set1_epi8(1); let mask = 0b01010101_01010101; - let r = _mm_mask_cmp_epu8_mask(mask, a, b, _MM_CMPINT_LT); + let r = _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b); assert_eq!(r, 0b01010101_01010101); } From e2b3855675741a2412eb01bf3cd67626e793dce0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:39:20 +0100 Subject: [PATCH 070/123] convert `_mm512_cmp_epi16_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index e011cb4749..37b882353b 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -3903,17 +3903,13 @@ pub unsafe fn _mm_mask_cmp_epu8_mask( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epi16_mask&expand=691) #[inline] #[target_feature(enable = "avx512bw")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm512_cmp_epi16_mask(a: __m512i, b: __m512i, imm8: i32) -> __mmask32 { +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm512_cmp_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { + static_assert_imm3!(IMM8); let a = a.as_i16x32(); let b = b.as_i16x32(); - macro_rules! call { - ($imm3:expr) => { - vpcmpw(a, b, $imm3, 0b11111111_11111111_11111111_11111111) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpw(a, b, IMM8, 0b11111111_11111111_11111111_11111111); transmute(r) } @@ -13515,7 +13511,7 @@ mod tests { unsafe fn test_mm512_cmp_epi16_mask() { let a = _mm512_set1_epi16(0); let b = _mm512_set1_epi16(1); - let m = _mm512_cmp_epi16_mask(a, b, _MM_CMPINT_LT); + let m = _mm512_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b); assert_eq!(m, 0b11111111_11111111_11111111_11111111); } From 558016c38d940d11ea796052333c7a4eecd331d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:40:06 +0100 Subject: [PATCH 071/123] convert `_mm512_mask_cmp_epi16_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 37b882353b..933e829990 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -3918,22 +3918,17 @@ pub unsafe fn _mm512_cmp_epi16_mask(a: __m512i, b: __m512i) -> /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epi16_mask&expand=692) #[inline] #[target_feature(enable = "avx512bw")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm512_mask_cmp_epi16_mask( +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm512_mask_cmp_epi16_mask( k1: __mmask32, a: __m512i, b: __m512i, - imm8: i32, ) -> __mmask32 { + static_assert_imm3!(IMM8); let a = a.as_i16x32(); let b = b.as_i16x32(); - macro_rules! call { - ($imm3:expr) => { - vpcmpw(a, b, $imm3, k1) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpw(a, b, IMM8, k1); transmute(r) } @@ -13520,7 +13515,7 @@ mod tests { let a = _mm512_set1_epi16(0); let b = _mm512_set1_epi16(1); let mask = 0b01010101_01010101_01010101_01010101; - let r = _mm512_mask_cmp_epi16_mask(mask, a, b, _MM_CMPINT_LT); + let r = _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b); assert_eq!(r, 0b01010101_01010101_01010101_01010101); } From 5b10e43ca4518e9460ea44fd139da45258b96f8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:41:44 +0100 Subject: [PATCH 072/123] convert `_mm256_cmp_epi16_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 933e829990..a249057a0d 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -3937,17 +3937,13 @@ pub unsafe fn _mm512_mask_cmp_epi16_mask( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_epi16_mask&expand=689) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm256_cmp_epi16_mask(a: __m256i, b: __m256i, imm8: i32) -> __mmask16 { +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm256_cmp_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { + static_assert_imm3!(IMM8); let a = a.as_i16x16(); let b = b.as_i16x16(); - macro_rules! call { - ($imm3:expr) => { - vpcmpw256(a, b, $imm3, 0b11111111_11111111) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpw256(a, b, IMM8, 0b11111111_11111111); transmute(r) } @@ -13523,7 +13519,7 @@ mod tests { unsafe fn test_mm256_cmp_epi16_mask() { let a = _mm256_set1_epi16(0); let b = _mm256_set1_epi16(1); - let m = _mm256_cmp_epi16_mask(a, b, _MM_CMPINT_LT); + let m = _mm256_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b); assert_eq!(m, 0b11111111_11111111); } From 2551d9bd2f7bd27e49531ead7eae854e8c80576c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:42:46 +0100 Subject: [PATCH 073/123] convert `_mm256_mask_cmp_epi16_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index a249057a0d..8bf8dfd1da 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -3952,22 +3952,17 @@ pub unsafe fn _mm256_cmp_epi16_mask(a: __m256i, b: __m256i) -> /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_epi16_mask&expand=690) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm256_mask_cmp_epi16_mask( +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm256_mask_cmp_epi16_mask( k1: __mmask16, a: __m256i, b: __m256i, - imm8: i32, ) -> __mmask16 { + static_assert_imm3!(IMM8); let a = a.as_i16x16(); let b = b.as_i16x16(); - macro_rules! call { - ($imm3:expr) => { - vpcmpw256(a, b, $imm3, k1) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpw256(a, b, IMM8, k1); transmute(r) } @@ -13528,7 +13523,7 @@ mod tests { let a = _mm256_set1_epi16(0); let b = _mm256_set1_epi16(1); let mask = 0b01010101_01010101; - let r = _mm256_mask_cmp_epi16_mask(mask, a, b, _MM_CMPINT_LT); + let r = _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b); assert_eq!(r, 0b01010101_01010101); } From e9ebb9433edc335d3adf8f32a826ac099fa1601f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:43:33 +0100 Subject: [PATCH 074/123] convert `_mm_cmp_epi16_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 8bf8dfd1da..c905cdf4a6 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -3971,17 +3971,13 @@ pub unsafe fn _mm256_mask_cmp_epi16_mask( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_epi16_mask&expand=687) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm_cmp_epi16_mask(a: __m128i, b: __m128i, imm8: i32) -> __mmask8 { +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm_cmp_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { + static_assert_imm3!(IMM8); let a = a.as_i16x8(); let b = b.as_i16x8(); - macro_rules! call { - ($imm3:expr) => { - vpcmpw128(a, b, $imm3, 0b11111111) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpw128(a, b, IMM8, 0b11111111); transmute(r) } @@ -13531,7 +13527,7 @@ mod tests { unsafe fn test_mm_cmp_epi16_mask() { let a = _mm_set1_epi16(0); let b = _mm_set1_epi16(1); - let m = _mm_cmp_epi16_mask(a, b, _MM_CMPINT_LT); + let m = _mm_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b); assert_eq!(m, 0b11111111); } From e84657a465f25cbfac68ea70036a3cb78cf5a725 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:44:18 +0100 Subject: [PATCH 075/123] convert `_mm_mask_cmp_epi16_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index c905cdf4a6..e2cfe4b0ac 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -3986,17 +3986,17 @@ pub unsafe fn _mm_cmp_epi16_mask(a: __m128i, b: __m128i) -> __m /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_epi16_mask&expand=688) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm_mask_cmp_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __mmask8 { +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm_mask_cmp_epi16_mask( + k1: __mmask8, + a: __m128i, + b: __m128i, +) -> __mmask8 { + static_assert_imm3!(IMM8); let a = a.as_i16x8(); let b = b.as_i16x8(); - macro_rules! call { - ($imm3:expr) => { - vpcmpw128(a, b, $imm3, k1) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpw128(a, b, IMM8, k1); transmute(r) } @@ -13536,7 +13536,7 @@ mod tests { let a = _mm_set1_epi16(0); let b = _mm_set1_epi16(1); let mask = 0b01010101; - let r = _mm_mask_cmp_epi16_mask(mask, a, b, _MM_CMPINT_LT); + let r = _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b); assert_eq!(r, 0b01010101); } From dbbcb17293df2b624ef92721a382d0b9a8e0f7e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:44:58 +0100 Subject: [PATCH 076/123] convert `_mm512_cmp_epi8_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index e2cfe4b0ac..59e6372711 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -4005,22 +4005,18 @@ pub unsafe fn _mm_mask_cmp_epi16_mask( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epi8_mask&expand=709) #[inline] #[target_feature(enable = "avx512bw")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm512_cmp_epi8_mask(a: __m512i, b: __m512i, imm8: i32) -> __mmask64 { +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm512_cmp_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { + static_assert_imm3!(IMM8); let a = a.as_i8x64(); let b = b.as_i8x64(); - macro_rules! call { - ($imm3:expr) => { - vpcmpb( - a, - b, - $imm3, - 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, - ) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpb( + a, + b, + IMM8, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, + ); transmute(r) } @@ -13544,7 +13540,7 @@ mod tests { unsafe fn test_mm512_cmp_epi8_mask() { let a = _mm512_set1_epi8(0); let b = _mm512_set1_epi8(1); - let m = _mm512_cmp_epi8_mask(a, b, _MM_CMPINT_LT); + let m = _mm512_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b); assert_eq!( m, 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 From 2693a72ff043af351888296f3b9a37d3ee003c92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:46:06 +0100 Subject: [PATCH 077/123] convert `_mm512_mask_cmp_epi8_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 59e6372711..b0da4936d7 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -4025,22 +4025,17 @@ pub unsafe fn _mm512_cmp_epi8_mask(a: __m512i, b: __m512i) -> _ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epi8_mask&expand=710) #[inline] #[target_feature(enable = "avx512bw")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm512_mask_cmp_epi8_mask( +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm512_mask_cmp_epi8_mask( k1: __mmask64, a: __m512i, b: __m512i, - imm8: i32, ) -> __mmask64 { + static_assert_imm3!(IMM8); let a = a.as_i8x64(); let b = b.as_i8x64(); - macro_rules! call { - ($imm3:expr) => { - vpcmpb(a, b, $imm3, k1) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpb(a, b, IMM8, k1); transmute(r) } @@ -13552,7 +13547,7 @@ mod tests { let a = _mm512_set1_epi8(0); let b = _mm512_set1_epi8(1); let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; - let r = _mm512_mask_cmp_epi8_mask(mask, a, b, _MM_CMPINT_LT); + let r = _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b); assert_eq!( r, 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 From 28bbe5c1ad64402b4939b2da719c7192a4e19db6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:46:52 +0100 Subject: [PATCH 078/123] convert `_mm256_cmp_epi8_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index b0da4936d7..cdf049b1ce 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -4044,17 +4044,13 @@ pub unsafe fn _mm512_mask_cmp_epi8_mask( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_epi8_mask&expand=707) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm256_cmp_epi8_mask(a: __m256i, b: __m256i, imm8: i32) -> __mmask32 { +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm256_cmp_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { + static_assert_imm3!(IMM8); let a = a.as_i8x32(); let b = b.as_i8x32(); - macro_rules! call { - ($imm3:expr) => { - vpcmpb256(a, b, $imm3, 0b11111111_11111111_11111111_11111111) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpb256(a, b, IMM8, 0b11111111_11111111_11111111_11111111); transmute(r) } @@ -13558,7 +13554,7 @@ mod tests { unsafe fn test_mm256_cmp_epi8_mask() { let a = _mm256_set1_epi8(0); let b = _mm256_set1_epi8(1); - let m = _mm256_cmp_epi8_mask(a, b, _MM_CMPINT_LT); + let m = _mm256_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b); assert_eq!(m, 0b11111111_11111111_11111111_11111111); } From e6e04fdee5ef99601d207790ab645dd71fedd095 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:47:40 +0100 Subject: [PATCH 079/123] convert `_mm256_mask_cmp_epi8_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index cdf049b1ce..cb1cd29c92 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -4059,22 +4059,17 @@ pub unsafe fn _mm256_cmp_epi8_mask(a: __m256i, b: __m256i) -> _ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_epi8_mask&expand=708) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm256_mask_cmp_epi8_mask( +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm256_mask_cmp_epi8_mask( k1: __mmask32, a: __m256i, b: __m256i, - imm8: i32, ) -> __mmask32 { + static_assert_imm3!(IMM8); let a = a.as_i8x32(); let b = b.as_i8x32(); - macro_rules! call { - ($imm3:expr) => { - vpcmpb256(a, b, $imm3, k1) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpb256(a, b, IMM8, k1); transmute(r) } @@ -13563,7 +13558,7 @@ mod tests { let a = _mm256_set1_epi8(0); let b = _mm256_set1_epi8(1); let mask = 0b01010101_01010101_01010101_01010101; - let r = _mm256_mask_cmp_epi8_mask(mask, a, b, _MM_CMPINT_LT); + let r = _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b); assert_eq!(r, 0b01010101_01010101_01010101_01010101); } From 930d81255cacfb30d278fdaf9b5c4f1a38345a40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:48:46 +0100 Subject: [PATCH 080/123] convert `_mm_cmp_epi8_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index cb1cd29c92..93de2e0514 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -4078,17 +4078,13 @@ pub unsafe fn _mm256_mask_cmp_epi8_mask( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_epi8_mask&expand=705) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm_cmp_epi8_mask(a: __m128i, b: __m128i, imm8: i32) -> __mmask16 { +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm_cmp_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { + static_assert_imm3!(IMM8); let a = a.as_i8x16(); let b = b.as_i8x16(); - macro_rules! call { - ($imm3:expr) => { - vpcmpb128(a, b, $imm3, 0b11111111_11111111) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpb128(a, b, IMM8, 0b11111111_11111111); transmute(r) } @@ -13566,7 +13562,7 @@ mod tests { unsafe fn test_mm_cmp_epi8_mask() { let a = _mm_set1_epi8(0); let b = _mm_set1_epi8(1); - let m = _mm_cmp_epi8_mask(a, b, _MM_CMPINT_LT); + let m = _mm_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b); assert_eq!(m, 0b11111111_11111111); } From 513b2986b606da90d758a0f021e6d0fcded6afb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 22:49:27 +0100 Subject: [PATCH 081/123] convert `_mm_mask_cmp_epi8_mask` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 93de2e0514..5f48d76cb7 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -4093,22 +4093,17 @@ pub unsafe fn _mm_cmp_epi8_mask(a: __m128i, b: __m128i) -> __mm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_epi8_mask&expand=706) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))] -pub unsafe fn _mm_mask_cmp_epi8_mask( +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] +pub unsafe fn _mm_mask_cmp_epi8_mask( k1: __mmask16, a: __m128i, b: __m128i, - imm8: i32, ) -> __mmask16 { + static_assert_imm3!(IMM8); let a = a.as_i8x16(); let b = b.as_i8x16(); - macro_rules! call { - ($imm3:expr) => { - vpcmpb128(a, b, $imm3, k1) - }; - } - let r = constify_imm3!(imm8, call); + let r = vpcmpb128(a, b, IMM8, k1); transmute(r) } @@ -13571,7 +13566,7 @@ mod tests { let a = _mm_set1_epi8(0); let b = _mm_set1_epi8(1); let mask = 0b01010101_01010101; - let r = _mm_mask_cmp_epi8_mask(mask, a, b, _MM_CMPINT_LT); + let r = _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b); assert_eq!(r, 0b01010101_01010101); } From cf03e05d894cfe5d5ac2856f2767cd5f767c949f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 23:23:51 +0100 Subject: [PATCH 082/123] convert `_mm512_slli_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 16 ++++++---------- crates/core_arch/src/x86/macros.rs | 17 +++++++++++++++++ 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 5f48d76cb7..61614f8ff3 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -5008,16 +5008,12 @@ pub unsafe fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_slli_epi16&expand=5301) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(vpsllw, imm8 = 5))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_slli_epi16(a: __m512i, imm8: u32) -> __m512i { +#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_slli_epi16(a: __m512i) -> __m512i { + static_assert_imm_u8!(IMM8); let a = a.as_i16x32(); - macro_rules! call { - ($imm8:expr) => { - vpslliw(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vpslliw(a, IMM8); transmute(r) } @@ -14476,7 +14472,7 @@ mod tests { #[simd_test(enable = "avx512bw")] unsafe fn test_mm512_slli_epi16() { let a = _mm512_set1_epi16(1 << 15); - let r = _mm512_slli_epi16(a, 1); + let r = _mm512_slli_epi16::<1>(a); let e = _mm512_set1_epi16(0); assert_eq_m512i(r, e); } diff --git a/crates/core_arch/src/x86/macros.rs b/crates/core_arch/src/x86/macros.rs index 8cacbf44c5..8a6f025d53 100644 --- a/crates/core_arch/src/x86/macros.rs +++ b/crates/core_arch/src/x86/macros.rs @@ -32,6 +32,23 @@ macro_rules! static_assert_sae { }; } +// Helper struct used to trigger const eval errors when the unsigned const generic immediate value +// `IMM` is out of `[MIN-MAX]` range. +pub(crate) struct ValidateConstImmU32; +impl ValidateConstImmU32 { + pub(crate) const VALID: () = { + let _ = 1 / ((IMM >= MIN && IMM <= MAX) as usize); + }; +} + +#[allow(unused_macros)] +macro_rules! static_assert_imm_u8 { + ($imm:ident) => { + let _ = + $crate::core_arch::x86::macros::ValidateConstImmU32::<$imm, 0, { (1 << 8) - 1 }>::VALID; + }; +} + macro_rules! constify_imm3 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] From d641e7d2b376017ed77fac67ab72742114def126 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 23:27:08 +0100 Subject: [PATCH 083/123] convert `_mm512_mask_slli_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 61614f8ff3..dd0790bf86 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -5022,16 +5022,16 @@ pub unsafe fn _mm512_slli_epi16(a: __m512i) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_slli_epi16&expand=5299) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(vpsllw, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_slli_epi16(src: __m512i, k: __mmask32, a: __m512i, imm8: u32) -> __m512i { +#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_slli_epi16( + src: __m512i, + k: __mmask32, + a: __m512i, +) -> __m512i { + static_assert_imm_u8!(IMM8); let a = a.as_i16x32(); - macro_rules! call { - ($imm8:expr) => { - vpslliw(a, $imm8) - }; - } - let shf = constify_imm8_sae!(imm8, call); + let shf = vpslliw(a, IMM8); transmute(simd_select_bitmask(k, shf, src.as_i16x32())) } @@ -14480,9 +14480,9 @@ mod tests { #[simd_test(enable = "avx512bw")] unsafe fn test_mm512_mask_slli_epi16() { let a = _mm512_set1_epi16(1 << 15); - let r = _mm512_mask_slli_epi16(a, 0, a, 1); + let r = _mm512_mask_slli_epi16::<1>(a, 0, a); assert_eq_m512i(r, a); - let r = _mm512_mask_slli_epi16(a, 0b11111111_11111111_11111111_11111111, a, 1); + let r = _mm512_mask_slli_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a); let e = _mm512_set1_epi16(0); assert_eq_m512i(r, e); } From af219716901fbb1f3ee0ecd9720571665c902fb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 23:28:48 +0100 Subject: [PATCH 084/123] convert `_mm512_maskz_slli_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index dd0790bf86..1589bd4b95 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -5040,16 +5040,12 @@ pub unsafe fn _mm512_mask_slli_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_slli_epi16&expand=5300) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(vpsllw, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_slli_epi16(k: __mmask32, a: __m512i, imm8: u32) -> __m512i { +#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_slli_epi16(k: __mmask32, a: __m512i) -> __m512i { + static_assert_imm_u8!(IMM8); let a = a.as_i16x32(); - macro_rules! call { - ($imm8:expr) => { - vpslliw(a, $imm8) - }; - } - let shf = constify_imm8_sae!(imm8, call); + let shf = vpslliw(a, IMM8); let zero = _mm512_setzero_si512().as_i16x32(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -14490,9 +14486,9 @@ mod tests { #[simd_test(enable = "avx512bw")] unsafe fn test_mm512_maskz_slli_epi16() { let a = _mm512_set1_epi16(1 << 15); - let r = _mm512_maskz_slli_epi16(0, a, 1); + let r = _mm512_maskz_slli_epi16::<1>(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_slli_epi16(0b11111111_11111111_11111111_11111111, a, 1); + let r = _mm512_maskz_slli_epi16::<1>(0b11111111_11111111_11111111_11111111, a); let e = _mm512_set1_epi16(0); assert_eq_m512i(r, e); } From fa85d17e34ead9efe943f451fdd2d9f6ddccd9b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:08:24 +0100 Subject: [PATCH 085/123] convert `_mm512_srli_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 1589bd4b95..1c863a6d10 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -5328,16 +5328,12 @@ pub unsafe fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srli_epi16&expand=5513) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(vpsrlw, imm8 = 5))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_srli_epi16(a: __m512i, imm8: u32) -> __m512i { +#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_srli_epi16(a: __m512i) -> __m512i { + static_assert_imm_u8!(IMM8); let a = a.as_i16x32(); - macro_rules! call { - ($imm8:expr) => { - vpsrliw(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vpsrliw(a, IMM8); transmute(r) } @@ -14704,7 +14700,7 @@ mod tests { #[simd_test(enable = "avx512bw")] unsafe fn test_mm512_srli_epi16() { let a = _mm512_set1_epi16(1 << 1); - let r = _mm512_srli_epi16(a, 2); + let r = _mm512_srli_epi16::<2>(a); let e = _mm512_set1_epi16(0); assert_eq_m512i(r, e); } From e38485e5919b889008b2d5999a24594dd78a55f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:09:50 +0100 Subject: [PATCH 086/123] convert `_mm512_mask_srli_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 1c863a6d10..ccd5d27a3d 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -5342,16 +5342,16 @@ pub unsafe fn _mm512_srli_epi16(a: __m512i) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srli_epi16&expand=5511) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(vpsrlw, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_srli_epi16(src: __m512i, k: __mmask32, a: __m512i, imm8: u32) -> __m512i { +#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_srli_epi16( + src: __m512i, + k: __mmask32, + a: __m512i, +) -> __m512i { + static_assert_imm_u8!(IMM8); let a = a.as_i16x32(); - macro_rules! call { - ($imm8:expr) => { - vpsrliw(a, $imm8) - }; - } - let shf = constify_imm8_sae!(imm8, call); + let shf = vpsrliw(a, IMM8); transmute(simd_select_bitmask(k, shf, src.as_i16x32())) } @@ -14708,9 +14708,9 @@ mod tests { #[simd_test(enable = "avx512bw")] unsafe fn test_mm512_mask_srli_epi16() { let a = _mm512_set1_epi16(1 << 1); - let r = _mm512_mask_srli_epi16(a, 0, a, 2); + let r = _mm512_mask_srli_epi16::<2>(a, 0, a); assert_eq_m512i(r, a); - let r = _mm512_mask_srli_epi16(a, 0b11111111_11111111_11111111_11111111, a, 2); + let r = _mm512_mask_srli_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a); let e = _mm512_set1_epi16(0); assert_eq_m512i(r, e); } From e7b263692bbe330f0ef27a8ae6eeec684b861813 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:14:10 +0100 Subject: [PATCH 087/123] convert `_mm512_maskz_srli_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index ccd5d27a3d..8c5e176f7c 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -5360,17 +5360,13 @@ pub unsafe fn _mm512_mask_srli_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srli_epi16&expand=5512) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(vpsrlw, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_srli_epi16(k: __mmask32, a: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_srli_epi16(k: __mmask32, a: __m512i) -> __m512i { + static_assert_imm8!(IMM8); //imm8 should be u32, it seems the document to verify is incorrect let a = a.as_i16x32(); - macro_rules! call { - ($imm8:expr) => { - vpsrliw(a, $imm8) - }; - } - let shf = constify_imm8_sae!(imm8, call); + let shf = vpsrliw(a, IMM8 as u32); let zero = _mm512_setzero_si512().as_i16x32(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -14718,9 +14714,9 @@ mod tests { #[simd_test(enable = "avx512bw")] unsafe fn test_mm512_maskz_srli_epi16() { let a = _mm512_set1_epi16(1 << 1); - let r = _mm512_maskz_srli_epi16(0, a, 2); + let r = _mm512_maskz_srli_epi16::<2>(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_srli_epi16(0b11111111_11111111_11111111_11111111, a, 2); + let r = _mm512_maskz_srli_epi16::<2>(0b11111111_11111111_11111111_11111111, a); let e = _mm512_set1_epi16(0); assert_eq_m512i(r, e); } From af23e95974a157989f8076bf22c73d7b1fb4a891 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:16:50 +0100 Subject: [PATCH 088/123] convert `_mm256_mask_srli_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 8c5e176f7c..9bb15db5e5 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -5376,15 +5376,15 @@ pub unsafe fn _mm512_maskz_srli_epi16(k: __mmask32, a: __m512i) /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_srli_epi16&expand=5508) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[cfg_attr(test, assert_instr(vpsrlw, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_mask_srli_epi16(src: __m256i, k: __mmask16, a: __m256i, imm8: i32) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_srli_epi16::<$imm8>(a) - }; - } - let shf = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_mask_srli_epi16( + src: __m256i, + k: __mmask16, + a: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); + let shf = _mm256_srli_epi16::(a); transmute(simd_select_bitmask(k, shf.as_i16x16(), src.as_i16x16())) } @@ -14724,9 +14724,9 @@ mod tests { #[simd_test(enable = "avx512bw,avx512vl")] unsafe fn test_mm256_mask_srli_epi16() { let a = _mm256_set1_epi16(1 << 1); - let r = _mm256_mask_srli_epi16(a, 0, a, 2); + let r = _mm256_mask_srli_epi16::<2>(a, 0, a); assert_eq_m256i(r, a); - let r = _mm256_mask_srli_epi16(a, 0b11111111_11111111, a, 2); + let r = _mm256_mask_srli_epi16::<2>(a, 0b11111111_11111111, a); let e = _mm256_set1_epi16(0); assert_eq_m256i(r, e); } From 4150a3947e63cb052e3e5570d6f55acbb7ecc8fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:26:51 +0100 Subject: [PATCH 089/123] convert `_mm256_maskz_srli_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 9bb15db5e5..6a9a29985d 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -5393,15 +5393,11 @@ pub unsafe fn _mm256_mask_srli_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_srli_epi16&expand=5509) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[cfg_attr(test, assert_instr(vpsrlw, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_maskz_srli_epi16(k: __mmask16, a: __m256i, imm8: i32) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_srli_epi16::<$imm8>(a) - }; - } - let shf = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_maskz_srli_epi16(k: __mmask16, a: __m256i) -> __m256i { + static_assert_imm8!(IMM8); + let shf = _mm256_srli_epi16::(a); let zero = _mm256_setzero_si256().as_i16x16(); transmute(simd_select_bitmask(k, shf.as_i16x16(), zero)) } @@ -14734,9 +14730,9 @@ mod tests { #[simd_test(enable = "avx512bw,avx512vl")] unsafe fn test_mm256_maskz_srli_epi16() { let a = _mm256_set1_epi16(1 << 1); - let r = _mm256_maskz_srli_epi16(0, a, 2); + let r = _mm256_maskz_srli_epi16::<2>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_srli_epi16(0b11111111_11111111, a, 2); + let r = _mm256_maskz_srli_epi16::<2>(0b11111111_11111111, a); let e = _mm256_set1_epi16(0); assert_eq_m256i(r, e); } From 73aae2a24ff5129e917f97145861cb6952fa723d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:28:27 +0100 Subject: [PATCH 090/123] convert `_mm_mask_srli_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 6a9a29985d..ba88db340f 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -5407,15 +5407,15 @@ pub unsafe fn _mm256_maskz_srli_epi16(k: __mmask16, a: __m256i) /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_srli_epi16&expand=5505) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[cfg_attr(test, assert_instr(vpsrlw, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_mask_srli_epi16(src: __m128i, k: __mmask8, a: __m128i, imm8: i32) -> __m128i { - macro_rules! call { - ($imm8:expr) => { - _mm_srli_epi16::<$imm8>(a) - }; - } - let shf = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_mask_srli_epi16( + src: __m128i, + k: __mmask8, + a: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); + let shf = _mm_srli_epi16::(a); transmute(simd_select_bitmask(k, shf.as_i16x8(), src.as_i16x8())) } @@ -14740,9 +14740,9 @@ mod tests { #[simd_test(enable = "avx512bw,avx512vl")] unsafe fn test_mm_mask_srli_epi16() { let a = _mm_set1_epi16(1 << 1); - let r = _mm_mask_srli_epi16(a, 0, a, 2); + let r = _mm_mask_srli_epi16::<2>(a, 0, a); assert_eq_m128i(r, a); - let r = _mm_mask_srli_epi16(a, 0b11111111, a, 2); + let r = _mm_mask_srli_epi16::<2>(a, 0b11111111, a); let e = _mm_set1_epi16(0); assert_eq_m128i(r, e); } From 0efea1cf774b5a0ed762007b500cef1d7562c35b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:30:12 +0100 Subject: [PATCH 091/123] convert `_mm_maskz_srli_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index ba88db340f..fa954a64d4 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -5424,15 +5424,11 @@ pub unsafe fn _mm_mask_srli_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_srli_epi16&expand=5506) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[cfg_attr(test, assert_instr(vpsrlw, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_maskz_srli_epi16(k: __mmask8, a: __m128i, imm8: i32) -> __m128i { - macro_rules! call { - ($imm8:expr) => { - _mm_srli_epi16::<$imm8>(a) - }; - } - let shf = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_maskz_srli_epi16(k: __mmask8, a: __m128i) -> __m128i { + static_assert_imm8!(IMM8); + let shf = _mm_srli_epi16::(a); let zero = _mm_setzero_si128().as_i16x8(); transmute(simd_select_bitmask(k, shf.as_i16x8(), zero)) } @@ -14750,9 +14746,9 @@ mod tests { #[simd_test(enable = "avx512bw,avx512vl")] unsafe fn test_mm_maskz_srli_epi16() { let a = _mm_set1_epi16(1 << 1); - let r = _mm_maskz_srli_epi16(0, a, 2); + let r = _mm_maskz_srli_epi16::<2>(0, a); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_srli_epi16(0b11111111, a, 2); + let r = _mm_maskz_srli_epi16::<2>(0b11111111, a); let e = _mm_set1_epi16(0); assert_eq_m128i(r, e); } From 89687f2c14b89a0efc8a738687225479721eac2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:32:19 +0100 Subject: [PATCH 092/123] convert `_mm512_srai_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index fa954a64d4..fb9de3db10 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -5641,16 +5641,12 @@ pub unsafe fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srai_epi16&expand=5427) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(vpsraw, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_srai_epi16(a: __m512i, imm8: u32) -> __m512i { +#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_srai_epi16(a: __m512i) -> __m512i { + static_assert_imm_u8!(IMM8); let a = a.as_i16x32(); - macro_rules! call { - ($imm8:expr) => { - vpsraiw(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vpsraiw(a, IMM8); transmute(r) } @@ -14924,7 +14920,7 @@ mod tests { #[simd_test(enable = "avx512bw")] unsafe fn test_mm512_srai_epi16() { let a = _mm512_set1_epi16(8); - let r = _mm512_srai_epi16(a, 2); + let r = _mm512_srai_epi16::<2>(a); let e = _mm512_set1_epi16(2); assert_eq_m512i(r, e); } From 054acf5e984499504e07f31cdd4ff0f5e3833a6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:33:59 +0100 Subject: [PATCH 093/123] convert `_mm512_mask_srai_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index fb9de3db10..f977225cbb 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -5655,16 +5655,16 @@ pub unsafe fn _mm512_srai_epi16(a: __m512i) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srai_epi16&expand=5425) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(vpsraw, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_srai_epi16(src: __m512i, k: __mmask32, a: __m512i, imm8: u32) -> __m512i { +#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_srai_epi16( + src: __m512i, + k: __mmask32, + a: __m512i, +) -> __m512i { + static_assert_imm_u8!(IMM8); let a = a.as_i16x32(); - macro_rules! call { - ($imm8:expr) => { - vpsraiw(a, $imm8) - }; - } - let shf = constify_imm8_sae!(imm8, call); + let shf = vpsraiw(a, IMM8); transmute(simd_select_bitmask(k, shf, src.as_i16x32())) } @@ -14928,9 +14928,9 @@ mod tests { #[simd_test(enable = "avx512bw")] unsafe fn test_mm512_mask_srai_epi16() { let a = _mm512_set1_epi16(8); - let r = _mm512_mask_srai_epi16(a, 0, a, 2); + let r = _mm512_mask_srai_epi16::<2>(a, 0, a); assert_eq_m512i(r, a); - let r = _mm512_mask_srai_epi16(a, 0b11111111_11111111_11111111_11111111, a, 2); + let r = _mm512_mask_srai_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a); let e = _mm512_set1_epi16(2); assert_eq_m512i(r, e); } From 80294a89db8ef18b9b88745f5219fdae2c14c959 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:35:09 +0100 Subject: [PATCH 094/123] convert `_mm512_maskz_srai_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index f977225cbb..960cb98e2e 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -5673,16 +5673,12 @@ pub unsafe fn _mm512_mask_srai_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srai_epi16&expand=5426) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(vpsraw, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_srai_epi16(k: __mmask32, a: __m512i, imm8: u32) -> __m512i { +#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_srai_epi16(k: __mmask32, a: __m512i) -> __m512i { + static_assert_imm_u8!(IMM8); let a = a.as_i16x32(); - macro_rules! call { - ($imm8:expr) => { - vpsraiw(a, $imm8) - }; - } - let shf = constify_imm8_sae!(imm8, call); + let shf = vpsraiw(a, IMM8); let zero = _mm512_setzero_si512().as_i16x32(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -14938,9 +14934,9 @@ mod tests { #[simd_test(enable = "avx512bw")] unsafe fn test_mm512_maskz_srai_epi16() { let a = _mm512_set1_epi16(8); - let r = _mm512_maskz_srai_epi16(0, a, 2); + let r = _mm512_maskz_srai_epi16::<2>(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_srai_epi16(0b11111111_11111111_11111111_11111111, a, 2); + let r = _mm512_maskz_srai_epi16::<2>(0b11111111_11111111_11111111_11111111, a); let e = _mm512_set1_epi16(2); assert_eq_m512i(r, e); } From 4c0fec9b0e57b4123ae198b3182fb53dd392d572 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:42:07 +0100 Subject: [PATCH 095/123] convert `_mm512_mask_shufflelo_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 960cb98e2e..3348dff88a 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7194,20 +7194,15 @@ pub unsafe fn _mm512_shufflelo_epi16(a: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shufflelo_epi16&expand=5219) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(vpshuflw, imm8 = 0))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_shufflelo_epi16( +#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_shufflelo_epi16( src: __m512i, k: __mmask32, a: __m512i, - imm8: i32, ) -> __m512i { - macro_rules! call { - ($imm8:expr) => { - _mm512_shufflelo_epi16(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(IMM8); + let r = _mm512_shufflelo_epi16(a, IMM8); transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32())) } @@ -16332,10 +16327,13 @@ mod tests { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ); - let r = _mm512_mask_shufflelo_epi16(a, 0, a, 0b00_01_01_11); + let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a); assert_eq_m512i(r, a); - let r = - _mm512_mask_shufflelo_epi16(a, 0b11111111_11111111_11111111_11111111, a, 0b00_01_01_11); + let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>( + a, + 0b11111111_11111111_11111111_11111111, + a, + ); #[rustfmt::skip] let e = _mm512_set_epi16( 0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12, From 61990f6f7fd4de14b7c36c74512e7db365310213 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:43:26 +0100 Subject: [PATCH 096/123] convert `_mm512_maskz_shufflelo_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 3348dff88a..7ecbc8a9ef 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7211,15 +7211,11 @@ pub unsafe fn _mm512_mask_shufflelo_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shufflelo_epi16&expand=5220) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(vpshuflw, imm8 = 0))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_shufflelo_epi16(k: __mmask32, a: __m512i, imm8: i32) -> __m512i { - macro_rules! call { - ($imm8:expr) => { - _mm512_shufflelo_epi16(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_shufflelo_epi16(k: __mmask32, a: __m512i) -> __m512i { + static_assert_imm8!(IMM8); + let r = _mm512_shufflelo_epi16(a, IMM8); let zero = _mm512_setzero_si512().as_i16x32(); transmute(simd_select_bitmask(k, r.as_i16x32(), zero)) } @@ -16349,10 +16345,10 @@ mod tests { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ); - let r = _mm512_maskz_shufflelo_epi16(0, a, 0b00_01_01_11); + let r = _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); let r = - _mm512_maskz_shufflelo_epi16(0b11111111_11111111_11111111_11111111, a, 0b00_01_01_11); + _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a); #[rustfmt::skip] let e = _mm512_set_epi16( 0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12, From 02901748dbc281eb9e833edbfe8c53177c20006e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:44:34 +0100 Subject: [PATCH 097/123] convert `_mm256_mask_shufflelo_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 7ecbc8a9ef..80290225bd 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7225,20 +7225,15 @@ pub unsafe fn _mm512_maskz_shufflelo_epi16(k: __mmask32, a: __m /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shufflelo_epi16&expand=5216) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[cfg_attr(test, assert_instr(vpshuflw, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_mask_shufflelo_epi16( +#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_mask_shufflelo_epi16( src: __m256i, k: __mmask16, a: __m256i, - imm8: i32, ) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_shufflelo_epi16(a, $imm8) - }; - } - let shuffle = constify_imm8_sae!(imm8, call); + static_assert_imm8!(IMM8); + let shuffle = _mm256_shufflelo_epi16(a, IMM8); transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16())) } @@ -16360,9 +16355,9 @@ mod tests { #[simd_test(enable = "avx512bw,avx512vl")] unsafe fn test_mm256_mask_shufflelo_epi16() { let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - let r = _mm256_mask_shufflelo_epi16(a, 0, a, 0b00_01_01_11); + let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a); assert_eq_m256i(r, a); - let r = _mm256_mask_shufflelo_epi16(a, 0b11111111_11111111, a, 0b00_01_01_11); + let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a); let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12); assert_eq_m256i(r, e); } From b7bc560d7e7653b230180d1cc464054dc857b9a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:45:15 +0100 Subject: [PATCH 098/123] convert `_mm256_maskz_shufflelo_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 80290225bd..716e14da36 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7242,15 +7242,11 @@ pub unsafe fn _mm256_mask_shufflelo_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shufflelo_epi16&expand=5217) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[cfg_attr(test, assert_instr(vpshuflw, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_maskz_shufflelo_epi16(k: __mmask16, a: __m256i, imm8: i32) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_shufflelo_epi16(a, $imm8) - }; - } - let shuffle = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_maskz_shufflelo_epi16(k: __mmask16, a: __m256i) -> __m256i { + static_assert_imm8!(IMM8); + let shuffle = _mm256_shufflelo_epi16(a, IMM8); let zero = _mm256_setzero_si256().as_i16x16(); transmute(simd_select_bitmask(k, shuffle.as_i16x16(), zero)) } @@ -16365,9 +16361,9 @@ mod tests { #[simd_test(enable = "avx512bw,avx512vl")] unsafe fn test_mm256_maskz_shufflelo_epi16() { let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - let r = _mm256_maskz_shufflelo_epi16(0, a, 0b00_01_01_11); + let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_shufflelo_epi16(0b11111111_11111111, a, 0b00_01_01_11); + let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111, a); let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12); assert_eq_m256i(r, e); } From 121b417e1556ab2c812bac07bf4183d49356da35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:46:48 +0100 Subject: [PATCH 099/123] convert `_mm_mask_shufflelo_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 716e14da36..48647ec156 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7256,20 +7256,15 @@ pub unsafe fn _mm256_maskz_shufflelo_epi16(k: __mmask16, a: __m /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shufflelo_epi16&expand=5213) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[cfg_attr(test, assert_instr(vpshuflw, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_mask_shufflelo_epi16( +#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_mask_shufflelo_epi16( src: __m128i, k: __mmask8, a: __m128i, - imm8: i32, ) -> __m128i { - macro_rules! call { - ($imm8:expr) => { - _mm_shufflelo_epi16::<$imm8>(a) - }; - } - let shuffle = constify_imm8_sae!(imm8, call); + static_assert_imm8!(IMM8); + let shuffle = _mm_shufflelo_epi16::(a); transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8())) } @@ -16371,9 +16366,9 @@ mod tests { #[simd_test(enable = "avx512bw,avx512vl")] unsafe fn test_mm_mask_shufflelo_epi16() { let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); - let r = _mm_mask_shufflelo_epi16(a, 0, a, 0b00_01_01_11); + let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a); assert_eq_m128i(r, a); - let r = _mm_mask_shufflelo_epi16(a, 0b11111111, a, 0b00_01_01_11); + let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111, a); let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4); assert_eq_m128i(r, e); } From 0fa05ec8f3538d746a4f40e9d58b09a90e9dd9ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:48:07 +0100 Subject: [PATCH 100/123] convert `_mm_maskz_shufflelo_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 48647ec156..5ae8f1041b 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7273,15 +7273,11 @@ pub unsafe fn _mm_mask_shufflelo_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shufflelo_epi16&expand=5214) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[cfg_attr(test, assert_instr(vpshuflw, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_maskz_shufflelo_epi16(k: __mmask8, a: __m128i, imm8: i32) -> __m128i { - macro_rules! call { - ($imm8:expr) => { - _mm_shufflelo_epi16::<$imm8>(a) - }; - } - let shuffle = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_maskz_shufflelo_epi16(k: __mmask8, a: __m128i) -> __m128i { + static_assert_imm8!(IMM8); + let shuffle = _mm_shufflelo_epi16::(a); let zero = _mm_setzero_si128().as_i16x8(); transmute(simd_select_bitmask(k, shuffle.as_i16x8(), zero)) } @@ -16376,9 +16372,9 @@ mod tests { #[simd_test(enable = "avx512bw,avx512vl")] unsafe fn test_mm_maskz_shufflelo_epi16() { let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); - let r = _mm_maskz_shufflelo_epi16(0, a, 0b00_01_01_11); + let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_shufflelo_epi16(0b11111111, a, 0b00_01_01_11); + let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111, a); let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4); assert_eq_m128i(r, e); } From b893ded37a3d0e3d551cbf3af9b7f41891d923dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:49:19 +0100 Subject: [PATCH 101/123] convert `_mm512_mask_shufflehi_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 5ae8f1041b..7d70e463da 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7345,20 +7345,15 @@ pub unsafe fn _mm512_shufflehi_epi16(a: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shufflehi_epi16&expand=5210) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(vpshufhw, imm8 = 0))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_shufflehi_epi16( +#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_shufflehi_epi16( src: __m512i, k: __mmask32, a: __m512i, - imm8: i32, ) -> __m512i { - macro_rules! call { - ($imm8:expr) => { - _mm512_shufflehi_epi16(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(IMM8); + let r = _mm512_shufflehi_epi16(a, IMM8); transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32())) } @@ -16402,10 +16397,13 @@ mod tests { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ); - let r = _mm512_mask_shufflehi_epi16(a, 0, a, 0b00_01_01_11); + let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a); assert_eq_m512i(r, a); - let r = - _mm512_mask_shufflehi_epi16(a, 0b11111111_11111111_11111111_11111111, a, 0b00_01_01_11); + let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>( + a, + 0b11111111_11111111_11111111_11111111, + a, + ); #[rustfmt::skip] let e = _mm512_set_epi16( 3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15, From 9ccb5d0a70fa8a95b0c1676f9478ef9f29fd6031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:50:18 +0100 Subject: [PATCH 102/123] convert `_mm512_maskz_shufflehi_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 7d70e463da..847c6b2e65 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7362,15 +7362,11 @@ pub unsafe fn _mm512_mask_shufflehi_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shufflehi_epi16&expand=5211) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(vpshufhw, imm8 = 0))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_shufflehi_epi16(k: __mmask32, a: __m512i, imm8: i32) -> __m512i { - macro_rules! call { - ($imm8:expr) => { - _mm512_shufflehi_epi16(a, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_shufflehi_epi16(k: __mmask32, a: __m512i) -> __m512i { + static_assert_imm8!(IMM8); + let r = _mm512_shufflehi_epi16(a, IMM8); let zero = _mm512_setzero_si512().as_i16x32(); transmute(simd_select_bitmask(k, r.as_i16x32(), zero)) } @@ -16419,10 +16415,10 @@ mod tests { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ); - let r = _mm512_maskz_shufflehi_epi16(0, a, 0b00_01_01_11); + let r = _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); let r = - _mm512_maskz_shufflehi_epi16(0b11111111_11111111_11111111_11111111, a, 0b00_01_01_11); + _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a); #[rustfmt::skip] let e = _mm512_set_epi16( 3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15, From 492e85844c38452265b8c9dbfb28022b87064111 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:51:41 +0100 Subject: [PATCH 103/123] convert `_mm256_mask_shufflehi_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 847c6b2e65..fc957f54a9 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7376,20 +7376,15 @@ pub unsafe fn _mm512_maskz_shufflehi_epi16(k: __mmask32, a: __m /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shufflehi_epi16&expand=5207) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[cfg_attr(test, assert_instr(vpshufhw, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_mask_shufflehi_epi16( +#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_mask_shufflehi_epi16( src: __m256i, k: __mmask16, a: __m256i, - imm8: i32, ) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_shufflehi_epi16(a, $imm8) - }; - } - let shuffle = constify_imm8_sae!(imm8, call); + static_assert_imm8!(IMM8); + let shuffle = _mm256_shufflehi_epi16(a, IMM8); transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16())) } @@ -16430,9 +16425,9 @@ mod tests { #[simd_test(enable = "avx512bw,avx512vl")] unsafe fn test_mm256_mask_shufflehi_epi16() { let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - let r = _mm256_mask_shufflehi_epi16(a, 0, a, 0b00_01_01_11); + let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a); assert_eq_m256i(r, a); - let r = _mm256_mask_shufflehi_epi16(a, 0b11111111_11111111, a, 0b00_01_01_11); + let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a); let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15); assert_eq_m256i(r, e); } From 1a8977846a1cc1322d52dd859aecfb929cc7d767 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:52:25 +0100 Subject: [PATCH 104/123] convert `_mm256_maskz_shufflehi_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index fc957f54a9..c40a0db161 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7393,15 +7393,11 @@ pub unsafe fn _mm256_mask_shufflehi_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shufflehi_epi16&expand=5208) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[cfg_attr(test, assert_instr(vpshufhw, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_maskz_shufflehi_epi16(k: __mmask16, a: __m256i, imm8: i32) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_shufflehi_epi16(a, $imm8) - }; - } - let shuffle = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_maskz_shufflehi_epi16(k: __mmask16, a: __m256i) -> __m256i { + static_assert_imm8!(IMM8); + let shuffle = _mm256_shufflehi_epi16(a, IMM8); let zero = _mm256_setzero_si256().as_i16x16(); transmute(simd_select_bitmask(k, shuffle.as_i16x16(), zero)) } @@ -16435,9 +16431,9 @@ mod tests { #[simd_test(enable = "avx512bw,avx512vl")] unsafe fn test_mm256_maskz_shufflehi_epi16() { let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - let r = _mm256_maskz_shufflehi_epi16(0, a, 0b00_01_01_11); + let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_shufflehi_epi16(0b11111111_11111111, a, 0b00_01_01_11); + let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111, a); let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15); assert_eq_m256i(r, e); } From a6df7821c7b85a69cba3bd60640cf1ccab65e43c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:53:30 +0100 Subject: [PATCH 105/123] convert `_mm_mask_shufflehi_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index c40a0db161..d9d04de63f 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7407,20 +7407,15 @@ pub unsafe fn _mm256_maskz_shufflehi_epi16(k: __mmask16, a: __m /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shufflehi_epi16&expand=5204) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[cfg_attr(test, assert_instr(vpshufhw, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_mask_shufflehi_epi16( +#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_mask_shufflehi_epi16( src: __m128i, k: __mmask8, a: __m128i, - imm8: i32, ) -> __m128i { - macro_rules! call { - ($imm8:expr) => { - _mm_shufflehi_epi16::<$imm8>(a) - }; - } - let shuffle = constify_imm8_sae!(imm8, call); + static_assert_imm8!(IMM8); + let shuffle = _mm_shufflehi_epi16::(a); transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8())) } @@ -16441,9 +16436,9 @@ mod tests { #[simd_test(enable = "avx512bw,avx512vl")] unsafe fn test_mm_mask_shufflehi_epi16() { let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); - let r = _mm_mask_shufflehi_epi16(a, 0, a, 0b00_01_01_11); + let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a); assert_eq_m128i(r, a); - let r = _mm_mask_shufflehi_epi16(a, 0b11111111, a, 0b00_01_01_11); + let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111, a); let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7); assert_eq_m128i(r, e); } From 7c6bb9a5d6355517b8c451336be09f063a79e2d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:54:29 +0100 Subject: [PATCH 106/123] convert `_mm_maskz_shufflehi_epi16` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index d9d04de63f..24a63bbcb2 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7424,15 +7424,11 @@ pub unsafe fn _mm_mask_shufflehi_epi16( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shufflehi_epi16&expand=5205) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[cfg_attr(test, assert_instr(vpshufhw, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_maskz_shufflehi_epi16(k: __mmask8, a: __m128i, imm8: i32) -> __m128i { - macro_rules! call { - ($imm8:expr) => { - _mm_shufflehi_epi16::<$imm8>(a) - }; - } - let shuffle = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_maskz_shufflehi_epi16(k: __mmask8, a: __m128i) -> __m128i { + static_assert_imm8!(IMM8); + let shuffle = _mm_shufflehi_epi16::(a); let zero = _mm_setzero_si128().as_i16x8(); transmute(simd_select_bitmask(k, shuffle.as_i16x8(), zero)) } @@ -16446,9 +16442,9 @@ mod tests { #[simd_test(enable = "avx512bw,avx512vl")] unsafe fn test_mm_maskz_shufflehi_epi16() { let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); - let r = _mm_maskz_shufflehi_epi16(0, a, 0b00_01_01_11); + let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_shufflehi_epi16(0b11111111, a, 0b00_01_01_11); + let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111, a); let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7); assert_eq_m128i(r, e); } From bee706eb644eab14db5bf6c1b9daf9a1867964bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:56:11 +0100 Subject: [PATCH 107/123] convert `_mm512_dbsad_epu8` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 24a63bbcb2..fdbde910c7 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7865,17 +7865,13 @@ pub unsafe fn _mm512_sad_epu8(a: __m512i, b: __m512i) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_dbsad_epu8&expand=2114) #[inline] #[target_feature(enable = "avx512bw")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vdbpsadbw, imm8 = 0))] -pub unsafe fn _mm512_dbsad_epu8(a: __m512i, b: __m512i, imm8: i32) -> __m512i { +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] +pub unsafe fn _mm512_dbsad_epu8(a: __m512i, b: __m512i) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_u8x64(); let b = b.as_u8x64(); - macro_rules! call { - ($imm8:expr) => { - vdbpsadbw(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vdbpsadbw(a, b, IMM8); transmute(r) } @@ -16870,7 +16866,7 @@ mod tests { unsafe fn test_mm512_dbsad_epu8() { let a = _mm512_set1_epi8(2); let b = _mm512_set1_epi8(4); - let r = _mm512_dbsad_epu8(a, b, 0); + let r = _mm512_dbsad_epu8::<0>(a, b); let e = _mm512_set1_epi16(8); assert_eq_m512i(r, e); } From 3c2225a7d59c66acb9c79c1e510368a6a5634c40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:57:08 +0100 Subject: [PATCH 108/123] convert `_mm512_mask_dbsad_epu8` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index fdbde910c7..e868e00349 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7880,23 +7880,18 @@ pub unsafe fn _mm512_dbsad_epu8(a: __m512i, b: __m512i) -> __m5 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_dbsad_epu8&expand=2115) #[inline] #[target_feature(enable = "avx512bw")] -#[rustc_args_required_const(4)] -#[cfg_attr(test, assert_instr(vdbpsadbw, imm8 = 0))] -pub unsafe fn _mm512_mask_dbsad_epu8( +#[rustc_legacy_const_generics(4)] +#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] +pub unsafe fn _mm512_mask_dbsad_epu8( src: __m512i, k: __mmask32, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_u8x64(); let b = b.as_u8x64(); - macro_rules! call { - ($imm8:expr) => { - vdbpsadbw(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vdbpsadbw(a, b, IMM8); transmute(simd_select_bitmask(k, r, src.as_u16x32())) } @@ -16876,9 +16871,9 @@ mod tests { let src = _mm512_set1_epi16(1); let a = _mm512_set1_epi8(2); let b = _mm512_set1_epi8(4); - let r = _mm512_mask_dbsad_epu8(src, 0, a, b, 0); + let r = _mm512_mask_dbsad_epu8::<0>(src, 0, a, b); assert_eq_m512i(r, src); - let r = _mm512_mask_dbsad_epu8(src, 0b11111111_11111111_11111111_11111111, a, b, 0); + let r = _mm512_mask_dbsad_epu8::<0>(src, 0b11111111_11111111_11111111_11111111, a, b); let e = _mm512_set1_epi16(8); assert_eq_m512i(r, e); } From b431f969a9816365c93112dd3bd2acc6d0294a82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:58:10 +0100 Subject: [PATCH 109/123] convert `_mm512_maskz_dbsad_epu8` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index e868e00349..4247eeb13e 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7900,17 +7900,17 @@ pub unsafe fn _mm512_mask_dbsad_epu8( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_dbsad_epu8&expand=2116) #[inline] #[target_feature(enable = "avx512bw")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vdbpsadbw, imm8 = 0))] -pub unsafe fn _mm512_maskz_dbsad_epu8(k: __mmask32, a: __m512i, b: __m512i, imm8: i32) -> __m512i { +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] +pub unsafe fn _mm512_maskz_dbsad_epu8( + k: __mmask32, + a: __m512i, + b: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_u8x64(); let b = b.as_u8x64(); - macro_rules! call { - ($imm8:expr) => { - vdbpsadbw(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vdbpsadbw(a, b, IMM8); transmute(simd_select_bitmask( k, r, @@ -16882,9 +16882,9 @@ mod tests { unsafe fn test_mm512_maskz_dbsad_epu8() { let a = _mm512_set1_epi8(2); let b = _mm512_set1_epi8(4); - let r = _mm512_maskz_dbsad_epu8(0, a, b, 0); + let r = _mm512_maskz_dbsad_epu8::<0>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_dbsad_epu8(0b11111111_11111111_11111111_11111111, a, b, 0); + let r = _mm512_maskz_dbsad_epu8::<0>(0b11111111_11111111_11111111_11111111, a, b); let e = _mm512_set1_epi16(8); assert_eq_m512i(r, e); } From b9629ebbb956ebbbc9ad786cd2a589e6522fa113 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:58:50 +0100 Subject: [PATCH 110/123] convert `_mm256_dbsad_epu8` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 4247eeb13e..49b9864b67 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7923,17 +7923,13 @@ pub unsafe fn _mm512_maskz_dbsad_epu8( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dbsad_epu8&expand=2111) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vdbpsadbw, imm8 = 0))] -pub unsafe fn _mm256_dbsad_epu8(a: __m256i, b: __m256i, imm8: i32) -> __m256i { +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] +pub unsafe fn _mm256_dbsad_epu8(a: __m256i, b: __m256i) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_u8x32(); let b = b.as_u8x32(); - macro_rules! call { - ($imm8:expr) => { - vdbpsadbw256(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vdbpsadbw256(a, b, IMM8); transmute(r) } @@ -16893,7 +16889,7 @@ mod tests { unsafe fn test_mm256_dbsad_epu8() { let a = _mm256_set1_epi8(2); let b = _mm256_set1_epi8(4); - let r = _mm256_dbsad_epu8(a, b, 0); + let r = _mm256_dbsad_epu8::<0>(a, b); let e = _mm256_set1_epi16(8); assert_eq_m256i(r, e); } From ad1145d4d86bd35d3d543cf40d4fd7b67f810c0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 00:59:38 +0100 Subject: [PATCH 111/123] convert `_mm256_mask_dbsad_epu8` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 49b9864b67..efcc6fc186 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7938,23 +7938,18 @@ pub unsafe fn _mm256_dbsad_epu8(a: __m256i, b: __m256i) -> __m2 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_dbsad_epu8&expand=2112) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(4)] -#[cfg_attr(test, assert_instr(vdbpsadbw, imm8 = 0))] -pub unsafe fn _mm256_mask_dbsad_epu8( +#[rustc_legacy_const_generics(4)] +#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] +pub unsafe fn _mm256_mask_dbsad_epu8( src: __m256i, k: __mmask16, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_u8x32(); let b = b.as_u8x32(); - macro_rules! call { - ($imm8:expr) => { - vdbpsadbw256(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vdbpsadbw256(a, b, IMM8); transmute(simd_select_bitmask(k, r, src.as_u16x16())) } @@ -16899,9 +16894,9 @@ mod tests { let src = _mm256_set1_epi16(1); let a = _mm256_set1_epi8(2); let b = _mm256_set1_epi8(4); - let r = _mm256_mask_dbsad_epu8(src, 0, a, b, 0); + let r = _mm256_mask_dbsad_epu8::<0>(src, 0, a, b); assert_eq_m256i(r, src); - let r = _mm256_mask_dbsad_epu8(src, 0b11111111_11111111, a, b, 0); + let r = _mm256_mask_dbsad_epu8::<0>(src, 0b11111111_11111111, a, b); let e = _mm256_set1_epi16(8); assert_eq_m256i(r, e); } From 7095318f845394cbf9c0f98fc06508d483d2a55e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 01:00:10 +0100 Subject: [PATCH 112/123] convert `_mm256_maskz_dbsad_epu8` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index efcc6fc186..a8cf3db3ff 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7958,17 +7958,13 @@ pub unsafe fn _mm256_mask_dbsad_epu8( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_dbsad_epu8&expand=2113) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vdbpsadbw, imm8 = 0))] -pub unsafe fn _mm256_maskz_dbsad_epu8(k: __mmask16, a: __m256i, b: __m256i, imm8: i32) -> __m256i { +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] +pub unsafe fn _mm256_maskz_dbsad_epu8(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_u8x32(); let b = b.as_u8x32(); - macro_rules! call { - ($imm8:expr) => { - vdbpsadbw256(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vdbpsadbw256(a, b, IMM8); transmute(simd_select_bitmask( k, r, @@ -16905,9 +16901,9 @@ mod tests { unsafe fn test_mm256_maskz_dbsad_epu8() { let a = _mm256_set1_epi8(2); let b = _mm256_set1_epi8(4); - let r = _mm256_maskz_dbsad_epu8(0, a, b, 0); + let r = _mm256_maskz_dbsad_epu8::<0>(0, a, b); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_dbsad_epu8(0b11111111_11111111, a, b, 0); + let r = _mm256_maskz_dbsad_epu8::<0>(0b11111111_11111111, a, b); let e = _mm256_set1_epi16(8); assert_eq_m256i(r, e); } From 75bc3ceb94b18cfd3b54c3285d63444f1a215df3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 01:00:51 +0100 Subject: [PATCH 113/123] convert `_mm_dbsad_epu8` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index a8cf3db3ff..b95c73fff7 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7960,7 +7960,11 @@ pub unsafe fn _mm256_mask_dbsad_epu8( #[target_feature(enable = "avx512bw,avx512vl")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] -pub unsafe fn _mm256_maskz_dbsad_epu8(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { +pub unsafe fn _mm256_maskz_dbsad_epu8( + k: __mmask16, + a: __m256i, + b: __m256i, +) -> __m256i { static_assert_imm8!(IMM8); let a = a.as_u8x32(); let b = b.as_u8x32(); @@ -7977,17 +7981,13 @@ pub unsafe fn _mm256_maskz_dbsad_epu8(k: __mmask16, a: __m256i, /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dbsad_epu8&expand=2108) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vdbpsadbw, imm8 = 0))] -pub unsafe fn _mm_dbsad_epu8(a: __m128i, b: __m128i, imm8: i32) -> __m128i { +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] +pub unsafe fn _mm_dbsad_epu8(a: __m128i, b: __m128i) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_u8x16(); let b = b.as_u8x16(); - macro_rules! call { - ($imm8:expr) => { - vdbpsadbw128(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vdbpsadbw128(a, b, IMM8); transmute(r) } @@ -16912,7 +16912,7 @@ mod tests { unsafe fn test_mm_dbsad_epu8() { let a = _mm_set1_epi8(2); let b = _mm_set1_epi8(4); - let r = _mm_dbsad_epu8(a, b, 0); + let r = _mm_dbsad_epu8::<0>(a, b); let e = _mm_set1_epi16(8); assert_eq_m128i(r, e); } From c9f5a2ed35ff2f84524548d78491100d6e582158 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 01:01:56 +0100 Subject: [PATCH 114/123] convert `_mm_mask_dbsad_epu8` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index b95c73fff7..034cdb8745 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7996,23 +7996,18 @@ pub unsafe fn _mm_dbsad_epu8(a: __m128i, b: __m128i) -> __m128i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_dbsad_epu8&expand=2109) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(4)] -#[cfg_attr(test, assert_instr(vdbpsadbw, imm8 = 0))] -pub unsafe fn _mm_mask_dbsad_epu8( +#[rustc_legacy_const_generics(4)] +#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] +pub unsafe fn _mm_mask_dbsad_epu8( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, - imm8: i32, ) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_u8x16(); let b = b.as_u8x16(); - macro_rules! call { - ($imm8:expr) => { - vdbpsadbw128(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vdbpsadbw128(a, b, IMM8); transmute(simd_select_bitmask(k, r, src.as_u16x8())) } @@ -16922,9 +16917,9 @@ mod tests { let src = _mm_set1_epi16(1); let a = _mm_set1_epi8(2); let b = _mm_set1_epi8(4); - let r = _mm_mask_dbsad_epu8(src, 0, a, b, 0); + let r = _mm_mask_dbsad_epu8::<0>(src, 0, a, b); assert_eq_m128i(r, src); - let r = _mm_mask_dbsad_epu8(src, 0b11111111, a, b, 0); + let r = _mm_mask_dbsad_epu8::<0>(src, 0b11111111, a, b); let e = _mm_set1_epi16(8); assert_eq_m128i(r, e); } From 79fd47b740ce4de5240c93b71938c1421f062035 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 01:02:34 +0100 Subject: [PATCH 115/123] convert `_mm_maskz_dbsad_epu8` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 034cdb8745..d0dee28c95 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -8016,17 +8016,17 @@ pub unsafe fn _mm_mask_dbsad_epu8( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_dbsad_epu8&expand=2110) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vdbpsadbw, imm8 = 0))] -pub unsafe fn _mm_maskz_dbsad_epu8(k: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __m128i { +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] +pub unsafe fn _mm_maskz_dbsad_epu8( + k: __mmask8, + a: __m128i, + b: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_u8x16(); let b = b.as_u8x16(); - macro_rules! call { - ($imm8:expr) => { - vdbpsadbw128(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vdbpsadbw128(a, b, IMM8); transmute(simd_select_bitmask(k, r, _mm_setzero_si128().as_u16x8())) } @@ -16928,9 +16928,9 @@ mod tests { unsafe fn test_mm_maskz_dbsad_epu8() { let a = _mm_set1_epi8(2); let b = _mm_set1_epi8(4); - let r = _mm_maskz_dbsad_epu8(0, a, b, 0); + let r = _mm_maskz_dbsad_epu8::<0>(0, a, b); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_dbsad_epu8(0b11111111, a, b, 0); + let r = _mm_maskz_dbsad_epu8::<0>(0b11111111, a, b); let e = _mm_set1_epi16(8); assert_eq_m128i(r, e); } From 7c1c2a8fa859904e56790397023adea2d80f0044 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 01:04:24 +0100 Subject: [PATCH 116/123] convert `_mm512_mask_alignr_epi8` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index d0dee28c95..0b7e564eb4 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -9056,21 +9056,16 @@ pub unsafe fn _mm512_alignr_epi8(a: __m512i, b: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_alignr_epi8&expand=264) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(vpalignr, imm8 = 1))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_alignr_epi8( +#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_alignr_epi8( src: __m512i, k: __mmask64, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { - macro_rules! call { - ($imm8:expr) => { - _mm512_alignr_epi8(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(IMM8); + let r = _mm512_alignr_epi8(a, b, IMM8); transmute(simd_select_bitmask(k, r.as_i8x64(), src.as_i8x64())) } @@ -17705,14 +17700,13 @@ mod tests { 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, ); let b = _mm512_set1_epi8(1); - let r = _mm512_mask_alignr_epi8(a, 0, a, b, 14); + let r = _mm512_mask_alignr_epi8::<14>(a, 0, a, b); assert_eq_m512i(r, a); - let r = _mm512_mask_alignr_epi8( + let r = _mm512_mask_alignr_epi8::<14>( a, 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, a, b, - 14, ); #[rustfmt::skip] let e = _mm512_set_epi8( From 7ae78308ffd6e4f72bfbd65c2c685d969b510bfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 01:06:07 +0100 Subject: [PATCH 117/123] convert `_mm512_maskz_alignr_epi8` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 0b7e564eb4..6438b7196b 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -9074,15 +9074,15 @@ pub unsafe fn _mm512_mask_alignr_epi8( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_alignr_epi8&expand=265) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(vpalignr, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_alignr_epi8(k: __mmask64, a: __m512i, b: __m512i, imm8: i32) -> __m512i { - macro_rules! call { - ($imm8:expr) => { - _mm512_alignr_epi8(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_alignr_epi8( + k: __mmask64, + a: __m512i, + b: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); + let r = _mm512_alignr_epi8(a, b, IMM8); let zero = _mm512_setzero_si512().as_i8x64(); transmute(simd_select_bitmask(k, r.as_i8x64(), zero)) } @@ -17728,13 +17728,12 @@ mod tests { 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, ); let b = _mm512_set1_epi8(1); - let r = _mm512_maskz_alignr_epi8(0, a, b, 14); + let r = _mm512_maskz_alignr_epi8::<14>(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_alignr_epi8( + let r = _mm512_maskz_alignr_epi8::<14>( 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, a, b, - 14, ); #[rustfmt::skip] let e = _mm512_set_epi8( From 4773594ebfe9a861c76f583fd32547b04eb9edef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 01:07:00 +0100 Subject: [PATCH 118/123] convert `_mm256_mask_alignr_epi8` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 6438b7196b..82c138abf2 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -9092,21 +9092,16 @@ pub unsafe fn _mm512_maskz_alignr_epi8( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_alignr_epi8&expand=261) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(4)] -#[cfg_attr(test, assert_instr(vpalignr, imm8 = 5))] -pub unsafe fn _mm256_mask_alignr_epi8( +#[rustc_legacy_const_generics(4)] +#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))] +pub unsafe fn _mm256_mask_alignr_epi8( src: __m256i, k: __mmask32, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_alignr_epi8(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(IMM8); + let r = _mm256_alignr_epi8(a, b, IMM8); transmute(simd_select_bitmask(k, r.as_i8x32(), src.as_i8x32())) } @@ -17753,9 +17748,9 @@ mod tests { 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, ); let b = _mm256_set1_epi8(1); - let r = _mm256_mask_alignr_epi8(a, 0, a, b, 14); + let r = _mm256_mask_alignr_epi8::<14>(a, 0, a, b); assert_eq_m256i(r, a); - let r = _mm256_mask_alignr_epi8(a, 0b11111111_11111111_11111111_11111111, a, b, 14); + let r = _mm256_mask_alignr_epi8::<14>(a, 0b11111111_11111111_11111111_11111111, a, b); #[rustfmt::skip] let e = _mm256_set_epi8( 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, From 8cee2fb8c675235a59f397286de256a6de5da11d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 01:08:32 +0100 Subject: [PATCH 119/123] convert `_mm256_maskz_alignr_epi8` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 82c138abf2..ccf30f9d96 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -9110,15 +9110,15 @@ pub unsafe fn _mm256_mask_alignr_epi8( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_alignr_epi8&expand=262) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vpalignr, imm8 = 5))] -pub unsafe fn _mm256_maskz_alignr_epi8(k: __mmask32, a: __m256i, b: __m256i, imm8: i32) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - _mm256_alignr_epi8(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))] +pub unsafe fn _mm256_maskz_alignr_epi8( + k: __mmask32, + a: __m256i, + b: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); + let r = _mm256_alignr_epi8(a, b, IMM8); transmute(simd_select_bitmask( k, r.as_i8x32(), @@ -17767,9 +17767,9 @@ mod tests { 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, ); let b = _mm256_set1_epi8(1); - let r = _mm256_maskz_alignr_epi8(0, a, b, 14); + let r = _mm256_maskz_alignr_epi8::<14>(0, a, b); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_alignr_epi8(0b11111111_11111111_11111111_11111111, a, b, 14); + let r = _mm256_maskz_alignr_epi8::<14>(0b11111111_11111111_11111111_11111111, a, b); #[rustfmt::skip] let e = _mm256_set_epi8( 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, From 6ba71df63923dff527fb54da7c8ff74e07e2cb6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 01:09:23 +0100 Subject: [PATCH 120/123] convert `_mm_mask_alignr_epi8` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index ccf30f9d96..a98c445f38 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -9131,21 +9131,16 @@ pub unsafe fn _mm256_maskz_alignr_epi8( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_alignr_epi8&expand=258) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(4)] -#[cfg_attr(test, assert_instr(vpalignr, imm8 = 5))] -pub unsafe fn _mm_mask_alignr_epi8( +#[rustc_legacy_const_generics(4)] +#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))] +pub unsafe fn _mm_mask_alignr_epi8( src: __m128i, k: __mmask16, a: __m128i, b: __m128i, - imm8: i32, ) -> __m128i { - macro_rules! call { - ($imm8:expr) => { - _mm_alignr_epi8(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + static_assert_imm8!(IMM8); + let r = _mm_alignr_epi8(a, b, IMM8); transmute(simd_select_bitmask(k, r.as_i8x16(), src.as_i8x16())) } @@ -17782,9 +17777,9 @@ mod tests { unsafe fn test_mm_mask_alignr_epi8() { let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0); let b = _mm_set1_epi8(1); - let r = _mm_mask_alignr_epi8(a, 0, a, b, 14); + let r = _mm_mask_alignr_epi8::<14>(a, 0, a, b); assert_eq_m128i(r, a); - let r = _mm_mask_alignr_epi8(a, 0b11111111_11111111, a, b, 14); + let r = _mm_mask_alignr_epi8::<14>(a, 0b11111111_11111111, a, b); let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1); assert_eq_m128i(r, e); } From 310211182009e6135519072d9ee110a29dd6d80f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 01:10:25 +0100 Subject: [PATCH 121/123] convert `_mm_maskz_alignr_epi8` to const generics --- crates/core_arch/src/x86/avx512bw.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index a98c445f38..2128a828ff 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -9149,15 +9149,15 @@ pub unsafe fn _mm_mask_alignr_epi8( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_alignr_epi8&expand=259) #[inline] #[target_feature(enable = "avx512bw,avx512vl")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vpalignr, imm8 = 5))] -pub unsafe fn _mm_maskz_alignr_epi8(k: __mmask16, a: __m128i, b: __m128i, imm8: i32) -> __m128i { - macro_rules! call { - ($imm8:expr) => { - _mm_alignr_epi8(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); +#[rustc_legacy_const_generics(3)] +#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))] +pub unsafe fn _mm_maskz_alignr_epi8( + k: __mmask16, + a: __m128i, + b: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); + let r = _mm_alignr_epi8(a, b, IMM8); let zero = _mm_setzero_si128().as_i8x16(); transmute(simd_select_bitmask(k, r.as_i8x16(), zero)) } @@ -17788,9 +17788,9 @@ mod tests { unsafe fn test_mm_maskz_alignr_epi8() { let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0); let b = _mm_set1_epi8(1); - let r = _mm_maskz_alignr_epi8(0, a, b, 14); + let r = _mm_maskz_alignr_epi8::<14>(0, a, b); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_alignr_epi8(0b11111111_11111111, a, b, 14); + let r = _mm_maskz_alignr_epi8::<14>(0b11111111_11111111, a, b); let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1); assert_eq_m128i(r, e); } From 47c78f7dbc7ce8b92aa164787894f2e15c362af4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 4 Mar 2021 21:44:53 +0100 Subject: [PATCH 122/123] convert `_xabort` to const generics --- crates/core_arch/src/x86/rtm.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/crates/core_arch/src/x86/rtm.rs b/crates/core_arch/src/x86/rtm.rs index 7cb1cc09bd..dab73cde9e 100644 --- a/crates/core_arch/src/x86/rtm.rs +++ b/crates/core_arch/src/x86/rtm.rs @@ -76,15 +76,11 @@ pub unsafe fn _xend() { /// [Intel's documentation](https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-xabort). #[inline] #[target_feature(enable = "rtm")] -#[cfg_attr(test, assert_instr(xabort, imm8 = 0x0))] -#[rustc_args_required_const(0)] -pub unsafe fn _xabort(imm8: u32) { - macro_rules! call { - ($imm8:expr) => { - x86_xabort($imm8) - }; - } - constify_imm8!(imm8, call) +#[cfg_attr(test, assert_instr(xabort, IMM8 = 0x0))] +#[rustc_legacy_const_generics(0)] +pub unsafe fn _xabort() { + static_assert_imm_u8!(IMM8); + x86_xabort(IMM8 as i8) } /// Queries whether the processor is executing in a transactional region identified by restricted @@ -130,14 +126,14 @@ mod tests { unsafe fn test_xabort() { const ABORT_CODE: u32 = 42; // aborting outside a transactional region does nothing - _xabort(ABORT_CODE); + _xabort::(); for _ in 0..10 { let mut x = 0; let code = rtm::_xbegin(); if code == _XBEGIN_STARTED { x += 1; - rtm::_xabort(ABORT_CODE); + rtm::_xabort::(); } else if code & _XABORT_EXPLICIT != 0 { let test_abort_code = rtm::_xabort_code(code); assert_eq!(test_abort_code, ABORT_CODE); From cf0c158e15e0b5ef619be41e303d09d3c0a70f1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 5 Mar 2021 03:56:21 +0100 Subject: [PATCH 123/123] temporarily disable WASM CI The LLVM12 upgrade in rustc may be causing issues --- .github/workflows/main.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c7cec5a858..615a121b7a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -77,7 +77,7 @@ jobs: - mips64-unknown-linux-gnuabi64 - mips64el-unknown-linux-gnuabi64 - s390x-unknown-linux-gnu - - wasm32-wasi + # - wasm32-wasi - i586-unknown-linux-gnu - x86_64-linux-android - arm-linux-androideabi @@ -131,8 +131,8 @@ jobs: disable_assert_instr: true - target: s390x-unknown-linux-gnu os: ubuntu-latest - - target: wasm32-wasi - os: ubuntu-latest + # - target: wasm32-wasi + # os: ubuntu-latest - target: aarch64-apple-darwin os: macos-latest norun: true