Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 92 additions & 54 deletions crates/core_arch/src/x86/sse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2997,24 +2997,28 @@ mod tests {
}
}

#[simd_test(enable = "sse")]
fn test_mm_cvtss_si32() {
fn test_mm_cvtss_si32_impl(f: fn(__m128) -> i32) {
let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1];
let result = &[42i32, -3, i32::MIN, 0, i32::MIN, 2147483520];
for i in 0..inputs.len() {
let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0);
let x = unsafe { _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0) };
let e = result[i];
let r = _mm_cvtss_si32(x);
assert_eq!(
e, r,
"TestCase #{} _mm_cvtss_si32({:?}) = {}, expected: {}",
i, x, r, e
);
let r = f(x);
assert_eq!(e, r, "TestCase #{} f({:?}) = {}, expected: {}", i, x, r, e);
}
}

#[simd_test(enable = "sse")]
fn test_mm_cvttss_si32() {
fn test_mm_cvtss_si32() {
test_mm_cvtss_si32_impl(_mm_cvtss_si32);
}

#[simd_test(enable = "sse")]
fn test_mm_cvt_ss2si() {
test_mm_cvtss_si32_impl(_mm_cvt_ss2si);
}

fn test_cvttss_si32_impl(f: fn(__m128) -> i32) {
let inputs = &[
(42.0f32, 42i32),
(-31.4, -31),
Expand All @@ -3028,35 +3032,48 @@ mod tests {
(2147483500.1, 2147483520),
];
for (i, &(xi, e)) in inputs.iter().enumerate() {
let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
let r = _mm_cvttss_si32(x);
assert_eq!(
e, r,
"TestCase #{} _mm_cvttss_si32({:?}) = {}, expected: {}",
i, x, r, e
);
let x = unsafe { _mm_setr_ps(xi, 1.0, 3.0, 4.0) };
let r = f(x);
assert_eq!(e, r, "TestCase #{} f({:?}) = {}, expected: {}", i, x, r, e);
}
}

#[simd_test(enable = "sse")]
const fn test_mm_cvtsi32_ss() {
let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
fn test_mm_cvttss_si32() {
test_cvttss_si32_impl(_mm_cvttss_si32);
}

let r = _mm_cvtsi32_ss(a, 4555);
let e = _mm_setr_ps(4555.0, 6.0, 7.0, 8.0);
assert_eq_m128(e, r);
#[simd_test(enable = "sse")]
fn test_mm_cvtt_ss2si() {
test_cvttss_si32_impl(_mm_cvtt_ss2si)
}

let r = _mm_cvtsi32_ss(a, 322223333);
let e = _mm_setr_ps(322223333.0, 6.0, 7.0, 8.0);
assert_eq_m128(e, r);
fn test_mm_cvtsi32_ss_impl(f: fn(__m128, i32) -> __m128) {
unsafe {
let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);

let r = _mm_cvtsi32_ss(a, -432);
let e = _mm_setr_ps(-432.0, 6.0, 7.0, 8.0);
assert_eq_m128(e, r);
let r = f(a, 4555);
assert_eq_m128(_mm_setr_ps(4555.0, 6.0, 7.0, 8.0), r);

let r = _mm_cvtsi32_ss(a, -322223333);
let e = _mm_setr_ps(-322223333.0, 6.0, 7.0, 8.0);
assert_eq_m128(e, r);
let r = f(a, 322223333);
assert_eq_m128(_mm_setr_ps(322223333.0, 6.0, 7.0, 8.0), r);

let r = f(a, -432);
assert_eq_m128(_mm_setr_ps(-432.0, 6.0, 7.0, 8.0), r);

let r = f(a, -322223333);
assert_eq_m128(_mm_setr_ps(-322223333.0, 6.0, 7.0, 8.0), r);
}
}

#[simd_test(enable = "sse")]
fn test_mm_cvtsi32_ss() {
test_mm_cvtsi32_ss_impl(_mm_cvtsi32_ss);
}

#[simd_test(enable = "sse")]
fn test_mm_cvt_si2ss() {
test_mm_cvtsi32_ss_impl(_mm_cvt_si2ss);
}

#[simd_test(enable = "sse")]
Expand All @@ -3071,18 +3088,25 @@ mod tests {
assert_eq_m128(r, _mm_setr_ps(4.25, 0.0, 0.0, 0.0));
}

fn test_mm_set1_ps_impl(f: fn(f32) -> __m128) {
unsafe {
let r = f(black_box(4.25));
assert_eq!(get_m128(r, 0), 4.25);
assert_eq!(get_m128(r, 1), 4.25);
assert_eq!(get_m128(r, 2), 4.25);
assert_eq!(get_m128(r, 3), 4.25);
}
}

#[simd_test(enable = "sse")]
fn test_mm_set1_ps() {
test_mm_set1_ps_impl(_mm_set1_ps);
test_mm_set1_ps_impl(_mm_set_ps1);
}

#[simd_test(enable = "sse")]
const fn test_mm_set1_ps() {
let r1 = _mm_set1_ps(black_box(4.25));
let r2 = _mm_set_ps1(black_box(4.25));
assert_eq!(get_m128(r1, 0), 4.25);
assert_eq!(get_m128(r1, 1), 4.25);
assert_eq!(get_m128(r1, 2), 4.25);
assert_eq!(get_m128(r1, 3), 4.25);
assert_eq!(get_m128(r2, 0), 4.25);
assert_eq!(get_m128(r2, 1), 4.25);
assert_eq!(get_m128(r2, 2), 4.25);
assert_eq!(get_m128(r2, 3), 4.25);
fn test_mm_set_ps1() {
test_mm_set1_ps_impl(_mm_set_ps1);
}

#[simd_test(enable = "sse")]
Expand Down Expand Up @@ -3171,11 +3195,20 @@ mod tests {
assert_eq_m128(r, _mm_setr_ps(42.0, 0.0, 0.0, 0.0));
}

#[simd_test(enable = "sse")]
const fn test_mm_load1_ps() {
fn test_mm_load1_ps_impl(f: unsafe fn(*const f32) -> __m128) {
let a = 42.0f32;
let r = unsafe { _mm_load1_ps(ptr::addr_of!(a)) };
assert_eq_m128(r, _mm_setr_ps(42.0, 42.0, 42.0, 42.0));
let r = unsafe { f(ptr::addr_of!(a)) };
unsafe { assert_eq_m128(r, _mm_setr_ps(42.0, 42.0, 42.0, 42.0)) };
}

#[simd_test(enable = "sse")]
fn test_mm_load1_ps() {
test_mm_load1_ps_impl(_mm_load1_ps);
}

#[simd_test(enable = "sse")]
fn test_mm_load_ps1() {
test_mm_load1_ps_impl(_mm_load_ps1);
}

#[simd_test(enable = "sse")]
Expand Down Expand Up @@ -3227,21 +3260,21 @@ mod tests {
assert_eq!(vals[2], 0.0);
}

#[simd_test(enable = "sse")]
const fn test_mm_store1_ps() {
fn test_mm_store1_ps_impl(f: unsafe fn(*mut f32, __m128)) {
let mut vals = Memory { data: [0.0f32; 4] };
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);

// guaranteed to be aligned to 16 bytes
let a = unsafe { _mm_setr_ps(1.0, 2.0, 3.0, 4.0) };
let p = vals.data.as_mut_ptr();

unsafe {
_mm_store1_ps(p, *black_box(&a));
f(p, *black_box(&a));
}

assert_eq!(vals.data, [1.0, 1.0, 1.0, 1.0]);
}

#[simd_test(enable = "sse")]
fn test_mm_store1_ps() {
test_mm_store1_ps_impl(_mm_store1_ps);
}

#[simd_test(enable = "sse")]
const fn test_mm_store_ps() {
let mut vals = Memory { data: [0.0f32; 4] };
Expand All @@ -3257,6 +3290,11 @@ mod tests {
assert_eq!(vals.data, [1.0, 2.0, 3.0, 4.0]);
}

#[simd_test(enable = "sse")]
fn test_mm_store_ps1() {
test_mm_store1_ps_impl(_mm_store_ps1);
}

#[simd_test(enable = "sse")]
const fn test_mm_storer_ps() {
let mut vals = Memory { data: [0.0f32; 4] };
Expand Down
8 changes: 3 additions & 5 deletions crates/stdarch-verify/tests/x86-intel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,11 +209,11 @@ fn verify_all_signatures() {
"_rdseed16_step",
"_rdseed32_step",
"_rdseed64_step",
// Prefetch
"_mm_prefetch",
// CMPXCHG
"cmpxchg16b",
// Undefined
//PREFETCH
"_mm_prefetch",
// Undefined,
"_mm_undefined_ps",
"_mm_undefined_pd",
"_mm_undefined_si128",
Expand Down Expand Up @@ -250,8 +250,6 @@ fn verify_all_signatures() {
"_mm_cvtt_ss2si",
"_mm_cvt_si2ss",
"_mm_set_ps1",
"_mm_load_ps1",
"_mm_store_ps1",
"_mm_bslli_si128",
"_mm_bsrli_si128",
"_bextr2_u32",
Expand Down