From de85b1c6fe86fcf8626d390d3efe25ab462077e2 Mon Sep 17 00:00:00 2001 From: ArunTamil21 Date: Thu, 26 Feb 2026 00:42:04 +0000 Subject: [PATCH 1/4] Add missing runtime tests for alias intrinsics: _mm_cvt_ss2si, _mm_cvtt_ss2si, _mm_cvt_si2ss, _mm_set_ps1 --- crates/core_arch/src/x86/sse.rs | 71 +++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs index 3f7781cc7d..0ec842f9fc 100644 --- a/crates/core_arch/src/x86/sse.rs +++ b/crates/core_arch/src/x86/sse.rs @@ -3013,6 +3013,22 @@ mod tests { } } + #[simd_test(enable = "sse")] + fn test_mm_cvt_ss2si() { + let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1]; + let result = &[42i32, -3, i32::MIN, 0, i32::MIN, 2147483520]; + for i in 0..inputs.len() { + let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0); + let e = result[i]; + let r = _mm_cvt_ss2si(x); + assert_eq!( + e, r, + "TestCase #{} _mm_cvt_ss2si({:?}) = {}, expected: {}", + i, x, r, e + ); + } + } + #[simd_test(enable = "sse")] fn test_mm_cvttss_si32() { let inputs = &[ @@ -3038,6 +3054,31 @@ mod tests { } } + #[simd_test(enable = "sse")] + fn test_mm_cvtt_ss2si() { + let inputs = &[ + (42.0f32, 42i32), + (-31.4, -31), + (-33.5, -33), + (-34.5, -34), + (10.999, 10), + (-5.99, -5), + (4.0e10, i32::MIN), + (4.0e-10, 0), + (NAN, i32::MIN), + (2147483500.1, 2147483520), + ]; + for (i, &(xi, e)) in inputs.iter().enumerate() { + let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); + let r = _mm_cvtt_ss2si(x); + assert_eq!( + e, r, + "TestCase #{} _mm_cvtt_ss2si({:?}) = {}, expected: {}", + i, x, r, e + ); + } + } + #[simd_test(enable = "sse")] const fn test_mm_cvtsi32_ss() { let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); @@ -3059,6 +3100,27 @@ mod tests { assert_eq_m128(e, r); } + #[simd_test(enable = "sse")] + fn test_mm_cvt_si2ss() { + let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); + + let r = _mm_cvt_si2ss(a, 4555); + let e = _mm_setr_ps(4555.0, 6.0, 7.0, 8.0); + assert_eq_m128(e, r); + + let r = _mm_cvt_si2ss(a, 322223333); + let e = _mm_setr_ps(322223333.0, 6.0, 7.0, 8.0); + assert_eq_m128(e, r); + + let r = _mm_cvt_si2ss(a, -432); + let e = _mm_setr_ps(-432.0, 6.0, 7.0, 8.0); + assert_eq_m128(e, r); + + let r = _mm_cvt_si2ss(a, -322223333); + let e = _mm_setr_ps(-322223333.0, 6.0, 7.0, 8.0); + assert_eq_m128(e, r); + } + #[simd_test(enable = "sse")] const fn test_mm_cvtss_f32() { let a = _mm_setr_ps(312.0134, 5.0, 6.0, 7.0); @@ -3085,6 +3147,15 @@ mod tests { assert_eq!(get_m128(r2, 3), 4.25); } + #[simd_test(enable = "sse")] + const fn test_mm_set_ps1() { + let r = _mm_set_ps1(black_box(4.25)); + assert_eq!(get_m128(r, 0), 4.25); + assert_eq!(get_m128(r, 1), 4.25); + assert_eq!(get_m128(r, 2), 4.25); + assert_eq!(get_m128(r, 3), 4.25); + } + #[simd_test(enable = "sse")] const fn test_mm_set_ps() { let r = _mm_set_ps( From b397dacf633e07d925d6646042cbf54622608936 Mon Sep 17 00:00:00 2001 From: ArunTamil21 Date: Thu, 26 Feb 2026 16:18:30 +0000 Subject: [PATCH 2/4] Refactor alias tests using meta function pattern and add missing tests for _mm_undefined_ps, _mm_prefetch, _mm_load_ps1, _mm_store_ps1 --- crates/core_arch/src/x86/sse.rs | 216 ++++++++++------------- crates/stdarch-verify/tests/x86-intel.rs | 7 +- 2 files changed, 94 insertions(+), 129 deletions(-) diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs index 0ec842f9fc..4e9a3a3cb1 100644 --- a/crates/core_arch/src/x86/sse.rs +++ b/crates/core_arch/src/x86/sse.rs @@ -2997,40 +2997,28 @@ mod tests { } } - #[simd_test(enable = "sse")] - fn test_mm_cvtss_si32() { + fn test_mm_cvtss_si32_impl(f: fn(__m128) -> i32) { let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1]; let result = &[42i32, -3, i32::MIN, 0, i32::MIN, 2147483520]; for i in 0..inputs.len() { - let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0); + let x = unsafe { _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0) }; let e = result[i]; - let r = _mm_cvtss_si32(x); - assert_eq!( - e, r, - "TestCase #{} _mm_cvtss_si32({:?}) = {}, expected: {}", - i, x, r, e - ); + let r = f(x); + assert_eq!(e, r, "TestCase #{} f({:?}) = {}, expected: {}", i, x, r, e); } } #[simd_test(enable = "sse")] - fn test_mm_cvt_ss2si() { - let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1]; - let result = &[42i32, -3, i32::MIN, 0, i32::MIN, 2147483520]; - for i in 0..inputs.len() { - let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0); - let e = result[i]; - let r = _mm_cvt_ss2si(x); - assert_eq!( - e, r, - "TestCase #{} _mm_cvt_ss2si({:?}) = {}, expected: {}", - i, x, r, e - ); - } + fn test_mm_cvtss_si32() { + test_mm_cvtss_si32_impl(_mm_cvtss_si32); } #[simd_test(enable = "sse")] - fn test_mm_cvttss_si32() { + fn test_mm_cvt_ss2si() { + test_mm_cvtss_si32_impl(_mm_cvt_ss2si); + } + + fn test_cvttss_si32_impl(f: fn(__m128) -> i32) { let inputs = &[ (42.0f32, 42i32), (-31.4, -31), @@ -3044,81 +3032,48 @@ mod tests { (2147483500.1, 2147483520), ]; for (i, &(xi, e)) in inputs.iter().enumerate() { - let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); - let r = _mm_cvttss_si32(x); - assert_eq!( - e, r, - "TestCase #{} _mm_cvttss_si32({:?}) = {}, expected: {}", - i, x, r, e - ); + let x = unsafe { _mm_setr_ps(xi, 1.0, 3.0, 4.0) }; + let r = f(x); + assert_eq!(e, r, "TestCase #{} f({:?}) = {}, expected: {}", i, x, r, e); } } #[simd_test(enable = "sse")] - fn test_mm_cvtt_ss2si() { - let inputs = &[ - (42.0f32, 42i32), - (-31.4, -31), - (-33.5, -33), - (-34.5, -34), - (10.999, 10), - (-5.99, -5), - (4.0e10, i32::MIN), - (4.0e-10, 0), - (NAN, i32::MIN), - (2147483500.1, 2147483520), - ]; - for (i, &(xi, e)) in inputs.iter().enumerate() { - let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); - let r = _mm_cvtt_ss2si(x); - assert_eq!( - e, r, - "TestCase #{} _mm_cvtt_ss2si({:?}) = {}, expected: {}", - i, x, r, e - ); - } + fn test_mm_cvttss_si32() { + test_cvttss_si32_impl(_mm_cvttss_si32); } #[simd_test(enable = "sse")] - const fn test_mm_cvtsi32_ss() { - let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); + fn test_mm_cvtt_ss2si() { + test_cvttss_si32_impl(_mm_cvtt_ss2si) + } - let r = _mm_cvtsi32_ss(a, 4555); - let e = _mm_setr_ps(4555.0, 6.0, 7.0, 8.0); - assert_eq_m128(e, r); + fn test_mm_cvtsi32_ss_impl(f: fn(__m128, i32) -> __m128) { + unsafe { + let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); - let r = _mm_cvtsi32_ss(a, 322223333); - let e = _mm_setr_ps(322223333.0, 6.0, 7.0, 8.0); - assert_eq_m128(e, r); + let r = f(a, 4555); + assert_eq_m128(_mm_setr_ps(4555.0, 6.0, 7.0, 8.0), r); - let r = _mm_cvtsi32_ss(a, -432); - let e = _mm_setr_ps(-432.0, 6.0, 7.0, 8.0); - assert_eq_m128(e, r); + let r = f(a, 322223333); + assert_eq_m128(_mm_setr_ps(322223333.0, 6.0, 7.0, 8.0), r); - let r = _mm_cvtsi32_ss(a, -322223333); - let e = _mm_setr_ps(-322223333.0, 6.0, 7.0, 8.0); - assert_eq_m128(e, r); + let r = f(a, -432); + assert_eq_m128(_mm_setr_ps(-432.0, 6.0, 7.0, 8.0), r); + + let r = f(a, -322223333); + assert_eq_m128(_mm_setr_ps(-322223333.0, 6.0, 7.0, 8.0), r); + } } #[simd_test(enable = "sse")] - fn test_mm_cvt_si2ss() { - let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); - - let r = _mm_cvt_si2ss(a, 4555); - let e = _mm_setr_ps(4555.0, 6.0, 7.0, 8.0); - assert_eq_m128(e, r); - - let r = _mm_cvt_si2ss(a, 322223333); - let e = _mm_setr_ps(322223333.0, 6.0, 7.0, 8.0); - assert_eq_m128(e, r); - - let r = _mm_cvt_si2ss(a, -432); - let e = _mm_setr_ps(-432.0, 6.0, 7.0, 8.0); - assert_eq_m128(e, r); + fn test_mm_cvtsi32_ss() { + test_mm_cvtsi32_ss_impl(_mm_cvtsi32_ss); + } - let r = _mm_cvt_si2ss(a, -322223333); - let e = _mm_setr_ps(-322223333.0, 6.0, 7.0, 8.0); - assert_eq_m128(e, r); + #[simd_test(enable = "sse")] + fn test_mm_cvt_si2ss() { + test_mm_cvtsi32_ss_impl(_mm_cvt_si2ss); } #[simd_test(enable = "sse")] @@ -3133,27 +3088,25 @@ mod tests { assert_eq_m128(r, _mm_setr_ps(4.25, 0.0, 0.0, 0.0)); } + fn test_mm_set1_ps_impl(f: fn(f32) -> __m128) { + unsafe { + let r = f(black_box(4.25)); + assert_eq!(get_m128(r, 0), 4.25); + assert_eq!(get_m128(r, 1), 4.25); + assert_eq!(get_m128(r, 2), 4.25); + assert_eq!(get_m128(r, 3), 4.25); + } + } + #[simd_test(enable = "sse")] - const fn test_mm_set1_ps() { - let r1 = _mm_set1_ps(black_box(4.25)); - let r2 = _mm_set_ps1(black_box(4.25)); - assert_eq!(get_m128(r1, 0), 4.25); - assert_eq!(get_m128(r1, 1), 4.25); - assert_eq!(get_m128(r1, 2), 4.25); - assert_eq!(get_m128(r1, 3), 4.25); - assert_eq!(get_m128(r2, 0), 4.25); - assert_eq!(get_m128(r2, 1), 4.25); - assert_eq!(get_m128(r2, 2), 4.25); - assert_eq!(get_m128(r2, 3), 4.25); + fn test_mm_set1_ps() { + test_mm_set1_ps_impl(_mm_set1_ps); + test_mm_set1_ps_impl(_mm_set_ps1); } #[simd_test(enable = "sse")] - const fn test_mm_set_ps1() { - let r = _mm_set_ps1(black_box(4.25)); - assert_eq!(get_m128(r, 0), 4.25); - assert_eq!(get_m128(r, 1), 4.25); - assert_eq!(get_m128(r, 2), 4.25); - assert_eq!(get_m128(r, 3), 4.25); + fn test_mm_set_ps1() { + test_mm_set1_ps_impl(_mm_set_ps1); } #[simd_test(enable = "sse")] @@ -3242,11 +3195,20 @@ mod tests { assert_eq_m128(r, _mm_setr_ps(42.0, 0.0, 0.0, 0.0)); } - #[simd_test(enable = "sse")] - const fn test_mm_load1_ps() { + fn test_mm_load1_ps_impl(f: unsafe fn(*const f32) -> __m128) { let a = 42.0f32; - let r = unsafe { _mm_load1_ps(ptr::addr_of!(a)) }; - assert_eq_m128(r, _mm_setr_ps(42.0, 42.0, 42.0, 42.0)); + let r = unsafe { f(ptr::addr_of!(a)) }; + unsafe { assert_eq_m128(r, _mm_setr_ps(42.0, 42.0, 42.0, 42.0)) }; + } + + #[simd_test(enable = "sse")] + fn test_mm_load1_ps() { + test_mm_load1_ps_impl(_mm_load1_ps); + } + + #[simd_test(enable = "sse")] + fn test_mm_load_ps1() { + test_mm_load1_ps_impl(_mm_load_ps1); } #[simd_test(enable = "sse")] @@ -3298,34 +3260,24 @@ mod tests { assert_eq!(vals[2], 0.0); } - #[simd_test(enable = "sse")] - const fn test_mm_store1_ps() { + fn test_mm_store1_ps_impl(f: unsafe fn(*mut f32, __m128)) { let mut vals = Memory { data: [0.0f32; 4] }; - let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - - // guaranteed to be aligned to 16 bytes + let a = unsafe { _mm_setr_ps(1.0, 2.0, 3.0, 4.0) }; let p = vals.data.as_mut_ptr(); - unsafe { - _mm_store1_ps(p, *black_box(&a)); + f(p, *black_box(&a)); } - assert_eq!(vals.data, [1.0, 1.0, 1.0, 1.0]); } #[simd_test(enable = "sse")] - const fn test_mm_store_ps() { - let mut vals = Memory { data: [0.0f32; 4] }; - let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - - // guaranteed to be aligned to 16 bytes - let p = vals.data.as_mut_ptr(); - - unsafe { - _mm_store_ps(p, *black_box(&a)); - } + fn test_mm_store1_ps() { + test_mm_store1_ps_impl(_mm_store1_ps); + } - assert_eq!(vals.data, [1.0, 2.0, 3.0, 4.0]); + #[simd_test(enable = "sse")] + fn test_mm_store_ps1() { + test_mm_store1_ps_impl(_mm_store_ps1); } #[simd_test(enable = "sse")] @@ -3364,6 +3316,24 @@ mod tests { assert_eq!(vals.data, [0.0, 1.0, 2.0, 3.0, 4.0, 0.0, 0.0, 0.0]); } + #[simd_test(enable = "sse")] + fn test_mm_undefined_ps() { + // _mm_undefined_ps returns a vector with indeterminate elements, + // so we can only verify it doesn't crash. + let _r = _mm_undefined_ps(); + } + + #[simd_test(enable = "sse")] + fn test_mm_prefetch() { + // Prefetch only affects cache behavior, not program correctness, + // so we can only verify it doesn't crash for each hint strategy. + let data = 42.0f32; + _mm_prefetch::<_MM_HINT_T0>(ptr::addr_of!(data) as *const i8); + _mm_prefetch::<_MM_HINT_T1>(ptr::addr_of!(data) as *const i8); + _mm_prefetch::<_MM_HINT_T2>(ptr::addr_of!(data) as *const i8); + _mm_prefetch::<_MM_HINT_NTA>(ptr::addr_of!(data) as *const i8); + } + #[simd_test(enable = "sse")] const fn test_mm_move_ss() { let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); diff --git a/crates/stdarch-verify/tests/x86-intel.rs b/crates/stdarch-verify/tests/x86-intel.rs index 2ac05e28cb..85f718038e 100644 --- a/crates/stdarch-verify/tests/x86-intel.rs +++ b/crates/stdarch-verify/tests/x86-intel.rs @@ -209,12 +209,9 @@ fn verify_all_signatures() { "_rdseed16_step", "_rdseed32_step", "_rdseed64_step", - // Prefetch - "_mm_prefetch", // CMPXCHG "cmpxchg16b", - // Undefined - "_mm_undefined_ps", + // Undefined, "_mm_undefined_pd", "_mm_undefined_si128", "_mm_undefined_ph", @@ -250,8 +247,6 @@ fn verify_all_signatures() { "_mm_cvtt_ss2si", "_mm_cvt_si2ss", "_mm_set_ps1", - "_mm_load_ps1", - "_mm_store_ps1", "_mm_bslli_si128", "_mm_bsrli_si128", "_bextr2_u32", From b5b01c0ff183fa2698c5d01f8fb507e69e95b140 Mon Sep 17 00:00:00 2001 From: ArunTamil21 Date: Thu, 26 Feb 2026 16:27:41 +0000 Subject: [PATCH 3/4] Restore deleted test_mm_store_ps --- crates/core_arch/src/x86/sse.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs index 4e9a3a3cb1..6a857f22b0 100644 --- a/crates/core_arch/src/x86/sse.rs +++ b/crates/core_arch/src/x86/sse.rs @@ -3275,6 +3275,21 @@ mod tests { test_mm_store1_ps_impl(_mm_store1_ps); } + #[simd_test(enable = "sse")] + const fn test_mm_store_ps() { + let mut vals = Memory { data: [0.0f32; 4] }; + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + + // guaranteed to be aligned to 16 bytes + let p = vals.data.as_mut_ptr(); + + unsafe { + _mm_store_ps(p, *black_box(&a)); + } + + assert_eq!(vals.data, [1.0, 2.0, 3.0, 4.0]); + } + #[simd_test(enable = "sse")] fn test_mm_store_ps1() { test_mm_store1_ps_impl(_mm_store_ps1); From 1a3e7983ab4ee2ca3ccce09409dfc9ce3f000928 Mon Sep 17 00:00:00 2001 From: ArunTamil21 Date: Fri, 27 Feb 2026 18:28:23 +0000 Subject: [PATCH 4/4] Remove redundant tests for _mm_prefetch and _mm_undefined_ps Already verified by assert_instr; no output to assert at runtime. --- crates/core_arch/src/x86/sse.rs | 18 ------------------ crates/stdarch-verify/tests/x86-intel.rs | 3 +++ 2 files changed, 3 insertions(+), 18 deletions(-) diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs index 6a857f22b0..95666b5c3d 100644 --- a/crates/core_arch/src/x86/sse.rs +++ b/crates/core_arch/src/x86/sse.rs @@ -3331,24 +3331,6 @@ mod tests { assert_eq!(vals.data, [0.0, 1.0, 2.0, 3.0, 4.0, 0.0, 0.0, 0.0]); } - #[simd_test(enable = "sse")] - fn test_mm_undefined_ps() { - // _mm_undefined_ps returns a vector with indeterminate elements, - // so we can only verify it doesn't crash. - let _r = _mm_undefined_ps(); - } - - #[simd_test(enable = "sse")] - fn test_mm_prefetch() { - // Prefetch only affects cache behavior, not program correctness, - // so we can only verify it doesn't crash for each hint strategy. - let data = 42.0f32; - _mm_prefetch::<_MM_HINT_T0>(ptr::addr_of!(data) as *const i8); - _mm_prefetch::<_MM_HINT_T1>(ptr::addr_of!(data) as *const i8); - _mm_prefetch::<_MM_HINT_T2>(ptr::addr_of!(data) as *const i8); - _mm_prefetch::<_MM_HINT_NTA>(ptr::addr_of!(data) as *const i8); - } - #[simd_test(enable = "sse")] const fn test_mm_move_ss() { let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); diff --git a/crates/stdarch-verify/tests/x86-intel.rs b/crates/stdarch-verify/tests/x86-intel.rs index 85f718038e..0ee32c826b 100644 --- a/crates/stdarch-verify/tests/x86-intel.rs +++ b/crates/stdarch-verify/tests/x86-intel.rs @@ -211,7 +211,10 @@ fn verify_all_signatures() { "_rdseed64_step", // CMPXCHG "cmpxchg16b", + //PREFETCH + "_mm_prefetch", // Undefined, + "_mm_undefined_ps", "_mm_undefined_pd", "_mm_undefined_si128", "_mm_undefined_ph",