From 12ffc408e225080d92e9bbad8962cd298d5e7746 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 7 Apr 2026 13:06:39 -0700 Subject: [PATCH] x64: Fix typo in `splat` lowering An `Xmm` was explicitly created but never used. This didn't have much real consequence since a rule with `sinkable_load` had higher precedence, but this does affect a few tests which splat constants by making it a bit more optimal. --- cranelift/codegen/src/isa/x64/lower.isle | 2 +- .../isa/aarch64/nan-canonicalization.clif | 14 ++------------ .../isa/x64/nan-canonicalization-sse41.clif | 14 ++------------ .../filetests/isa/x64/nan-canonicalization.clif | 11 ++--------- 4 files changed, 7 insertions(+), 34 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 9819ef8c8742..ac904f69449b 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -4876,7 +4876,7 @@ ;; on AVX2 to leverage that specific instruction for this operation. (rule 0 (lower (has_type $F32X4 (splat _ src))) (let ((tmp Xmm src)) - (x64_shufps src src 0))) + (x64_shufps tmp tmp 0))) (rule 1 (lower (has_type $F32X4 (splat _ src))) (if-let true (use_avx2)) (x64_vbroadcastss src)) diff --git a/cranelift/filetests/filetests/isa/aarch64/nan-canonicalization.clif b/cranelift/filetests/filetests/isa/aarch64/nan-canonicalization.clif index 619b245e1ad3..af76af4e75ad 100644 --- a/cranelift/filetests/filetests/isa/aarch64/nan-canonicalization.clif +++ b/cranelift/filetests/filetests/isa/aarch64/nan-canonicalization.clif @@ -15,7 +15,7 @@ block0(v0: f32x4, v1: f32x4): ; addps %xmm1, %xmm0 ; movl $0x7fc00000, %edi ; movd %edi, %xmm7 -; shufps $0x0, (%rip), %xmm7 +; shufps $0x0, %xmm7, %xmm7 ; movdqa %xmm0, %xmm1 ; cmpunordps %xmm0, %xmm1 ; movdqa %xmm0, %xmm2 @@ -35,7 +35,7 @@ block0(v0: f32x4, v1: f32x4): ; addps %xmm1, %xmm0 ; movl $0x7fc00000, %edi ; movd %edi, %xmm7 -; shufps $0, 0x28(%rip), %xmm7 +; shufps $0, %xmm7, %xmm7 ; movdqa %xmm0, %xmm1 ; cmpunordps %xmm0, %xmm1 ; movdqa %xmm0, %xmm2 @@ -46,16 +46,6 @@ block0(v0: f32x4, v1: f32x4): ; movq %rbp, %rsp ; popq %rbp ; retq -; addb %al, (%rax) -; addb %al, (%rax) -; addb %al, (%rax) -; addb %al, (%rax) -; sarb $0, (%rdi) -; addb %al, (%rax) -; addb %al, (%rax) -; addb %al, (%rax) -; addb %al, (%rax) -; addb %al, (%rax) function %f1(f64, f64) -> f64 { block0(v0: f64, v1: f64): diff --git a/cranelift/filetests/filetests/isa/x64/nan-canonicalization-sse41.clif b/cranelift/filetests/filetests/isa/x64/nan-canonicalization-sse41.clif index 830008b45e99..98fee9677b9b 100644 --- a/cranelift/filetests/filetests/isa/x64/nan-canonicalization-sse41.clif +++ b/cranelift/filetests/filetests/isa/x64/nan-canonicalization-sse41.clif @@ -15,7 +15,7 @@ block0(v0: f32x4, v1: f32x4): ; addps %xmm1, %xmm0 ; movl $0x7fc00000, %edi ; movd %edi, %xmm7 -; shufps $0x0, (%rip), %xmm7 +; shufps $0x0, %xmm7, %xmm7 ; movdqa %xmm0, %xmm1 ; cmpunordps %xmm0, %xmm1 ; movdqa %xmm0, %xmm2 @@ -35,7 +35,7 @@ block0(v0: f32x4, v1: f32x4): ; addps %xmm1, %xmm0 ; movl $0x7fc00000, %edi ; movd %edi, %xmm7 -; shufps $0, 0x28(%rip), %xmm7 +; shufps $0, %xmm7, %xmm7 ; movdqa %xmm0, %xmm1 ; cmpunordps %xmm0, %xmm1 ; movdqa %xmm0, %xmm2 @@ -46,16 +46,6 @@ block0(v0: f32x4, v1: f32x4): ; movq %rbp, %rsp ; popq %rbp ; retq -; addb %al, (%rax) -; addb %al, (%rax) -; addb %al, (%rax) -; addb %al, (%rax) -; sarb $0, (%rdi) -; addb %al, (%rax) -; addb %al, (%rax) -; addb %al, (%rax) -; addb %al, (%rax) -; addb %al, (%rax) function %f1(f64, f64) -> f64 { block0(v0: f64, v1: f64): diff --git a/cranelift/filetests/filetests/isa/x64/nan-canonicalization.clif b/cranelift/filetests/filetests/isa/x64/nan-canonicalization.clif index a9f1cd3735d9..d54c978aeac9 100644 --- a/cranelift/filetests/filetests/isa/x64/nan-canonicalization.clif +++ b/cranelift/filetests/filetests/isa/x64/nan-canonicalization.clif @@ -16,7 +16,7 @@ block0(v0: f32x4, v1: f32x4): ; movdqa %xmm0, %xmm1 ; movl $0x7fc00000, %r9d ; movd %r9d, %xmm5 -; shufps $0x0, (%rip), %xmm5 +; shufps $0x0, %xmm5, %xmm5 ; cmpunordps %xmm1, %xmm0 ; andps %xmm0, %xmm5 ; andnps %xmm1, %xmm0 @@ -34,7 +34,7 @@ block0(v0: f32x4, v1: f32x4): ; movdqa %xmm0, %xmm1 ; movl $0x7fc00000, %r9d ; movd %r9d, %xmm5 -; shufps $0, 0x12(%rip), %xmm5 +; shufps $0, %xmm5, %xmm5 ; cmpunordps %xmm1, %xmm0 ; andps %xmm0, %xmm5 ; andnps %xmm1, %xmm0 @@ -42,13 +42,6 @@ block0(v0: f32x4, v1: f32x4): ; movq %rbp, %rsp ; popq %rbp ; retq -; addb %al, (%rax) -; sarb $0, (%rdi) -; addb %al, (%rax) -; addb %al, (%rax) -; addb %al, (%rax) -; addb %al, (%rax) -; addb %al, (%rax) function %f1(f64, f64) -> f64 { block0(v0: f64, v1: f64):