diff --git a/cranelift/codegen/src/isa/s390x/inst.isle b/cranelift/codegen/src/isa/s390x/inst.isle index 04537641d267..6b2779bb0db4 100644 --- a/cranelift/codegen/src/isa/s390x/inst.isle +++ b/cranelift/codegen/src/isa/s390x/inst.isle @@ -1008,6 +1008,22 @@ (rd WritableReg) (mem MemArg)) + ;; Load address referenced by `mem` into `rd`. + (LoadIndexedAddr + (rd WritableReg) + (base Reg) + (index Reg) + (offset SImm20) + (size u8)) + + ;; Load address referenced by `mem` into `rd`. + (LoadLogicalIndexedAddr + (rd WritableReg) + (base Reg) + (index Reg) + (offset SImm20) + (size u8)) + ;; Meta-instruction to emit a loop around a sequence of instructions. ;; This control flow is not visible to the compiler core, in particular ;; the register allocator. Therefore, instructions in the loop may not @@ -1741,6 +1757,9 @@ (decl u32_from_value (u32) Value) (extern extractor u32_from_value u32_from_value) +(decl u16_from_value (u16) Value) +(extern extractor u16_from_value u16_from_value) + (decl u8_from_value (u8) Value) (extern extractor u8_from_value u8_from_value) @@ -1774,6 +1793,9 @@ (decl uimm16shifted_from_value (UImm16Shifted) Value) (extern extractor uimm16shifted_from_value uimm16shifted_from_value) +(decl simm20_from_value (SImm20) Value) +(extern extractor simm20_from_value simm20_from_value) + (decl uimm32shifted_from_value (UImm32Shifted) Value) (extern extractor uimm32shifted_from_value uimm32shifted_from_value) @@ -1927,6 +1949,23 @@ (if-let final_offset (memarg_symbol_offset_sum offset sym_offset)) (memarg_symbol name final_offset flags)) +(rule 2 (lower_address flags (has_type (mie4_enabled) + (iadd $I64 (ishl $I64 (uextend $I64 (iadd $I32 x (simm20_from_value z))) + (u8_from_value shift)) y)) (i64_from_offset offset)) + (memarg_reg_plus_off (load_logical_indexed_addr x y z shift) offset 0 flags)) + +(rule 3 (lower_address flags (has_type (mie4_enabled) + (iadd $I64 y (ishl $I64 (uextend $I64 (iadd $I32 x (simm20_from_value z))) + (u8_from_value shift)))) (i64_from_offset offset)) + (memarg_reg_plus_off (load_logical_indexed_addr y x z shift) offset 0 flags)) + +(rule 4 (lower_address flags (has_type (mie4_enabled) + (iadd $I64 (ishl $I64 (sextend $I64 (iadd $I32 x (simm20_from_value z))) (u8_from_value shift)) y)) (i64_from_offset offset)) + (memarg_reg_plus_off (load_indexed_addr x y z shift) offset 0 flags)) + +(rule 5 (lower_address flags (has_type (mie4_enabled) + (iadd $I64 y (ishl $I64 (sextend $I64 (iadd $I32 x (simm20_from_value z))) (u8_from_value shift)))) (i64_from_offset offset)) + (memarg_reg_plus_off (load_indexed_addr y x z shift) offset 0 flags)) ;; Lower an address plus a small bias into a `MemArg`. @@ -2817,6 +2856,20 @@ (_ Unit (emit (MInst.LoadAddr dst mem)))) dst)) +;; Helper for emitting `MInst.LoadIndexedAddr` instructions. +(decl load_indexed_addr (Reg Reg SImm20 u8) Reg) +(rule (load_indexed_addr base index offset size) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.LoadIndexedAddr dst base index offset size)))) + dst)) + +;; Helper for emitting `MInst.LoadLogicalIndexedAddr` instructions. +(decl load_logical_indexed_addr (Reg Reg SImm20 u8) Reg) +(rule (load_logical_indexed_addr base index offset size) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.LoadLogicalIndexedAddr dst base index offset size)))) + dst)) + ;; Helper for emitting `MInst.Call` instructions. (decl call_impl (WritableReg BoxCallInfo) SideEffectNoResult) (rule (call_impl reg info) diff --git a/cranelift/codegen/src/isa/s390x/inst/emit.rs b/cranelift/codegen/src/isa/s390x/inst/emit.rs index 2222faeff1b5..9132c12a69f6 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit.rs @@ -2321,6 +2321,32 @@ impl Inst { rd, &mem, opcode_rx, opcode_rxy, opcode_ril, false, sink, emit_info, state, ); } + &Inst::LoadIndexedAddr { + rd, + base, + index, + offset, + size, + } => { + let opcode: u16 = 0xe360 | (size as u16 & 0xf) << 1; + put( + sink, + &enc_rxy(opcode, rd.to_reg(), base, index, offset.bits()), + ); + } + &Inst::LoadLogicalIndexedAddr { + rd, + base, + index, + offset, + size, + } => { + let opcode: u16 = 0xe361 | (size as u16 & 0xf) << 1; + put( + sink, + &enc_rxy(opcode, rd.to_reg(), base, index, offset.bits()), + ); + } &Inst::Mov64 { rd, rm } => { let opcode = 0xb904; // LGR diff --git a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs index 701b50feef03..5db0629932bd 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs @@ -13929,6 +13929,50 @@ fn test_s390x_binemit() { "E7480001384D", "vrepg %v20, %v8, 1", )); + insns.push(( + Inst::LoadIndexedAddr { + rd: writable_gpr(1), + base: gpr(2), + index: gpr(3), + offset: SImm20::maybe_from_i64(0x7ffff).unwrap(), + size: 3, + }, + "E3132FFF7F66", + "lxag %r1, 524287(%r3,%r2)", + )); + insns.push(( + Inst::LoadIndexedAddr { + rd: writable_gpr(1), + base: gpr(2), + index: gpr(3), + offset: SImm20::maybe_from_i64(-2).unwrap(), + size: 4, + }, + "E3132FFEFF68", + "lxaq %r1, -2(%r3,%r2)", + )); + insns.push(( + Inst::LoadLogicalIndexedAddr { + rd: writable_gpr(1), + base: gpr(2), + index: gpr(3), + offset: SImm20::maybe_from_i64(0x7ffff).unwrap(), + size: 2, + }, + "E3132FFF7F65", + "llxaf %r1, 524287(%r3,%r2)", + )); + insns.push(( + Inst::LoadLogicalIndexedAddr { + rd: writable_gpr(1), + base: gpr(2), + index: gpr(3), + offset: SImm20::maybe_from_i64(-2).unwrap(), + size: 1, + }, + "E3132FFEFF63", + "llxah %r1, -2(%r3,%r2)", + )); let flags = settings::Flags::new(settings::builder()); diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index 421cfde58fef..bf3a0a6dcfd6 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -1,7 +1,7 @@ //! This module defines s390x-specific machine instruction types. use crate::binemit::{Addend, CodeOffset, Reloc}; -use crate::ir::{ExternalName, Type, types}; +use crate::ir::{ExternalName, MemFlags, Type, types}; use crate::isa::s390x::abi::S390xMachineDeps; use crate::isa::{CallConv, FunctionAlignment}; use crate::machinst::*; @@ -240,6 +240,10 @@ impl Inst { | Inst::Unwind { .. } | Inst::ElfTlsGetOffset { .. } => InstructionSet::Base, + Inst::LoadIndexedAddr { .. } | Inst::LoadLogicalIndexedAddr { .. } => { + InstructionSet::MIE4 + } + // These depend on the opcode Inst::AluRRR { alu_op, .. } => match alu_op { ALUOp::NotAnd32 | ALUOp::NotAnd64 => InstructionSet::MIE3, @@ -1030,6 +1034,20 @@ fn s390x_get_operands(inst: &mut Inst, collector: &mut DenyReuseVisitor { + collector.reg_def(rd); + collector.reg_use(base); + collector.reg_use(index); + } + Inst::LoadLogicalIndexedAddr { + rd, base, index, .. + } => { + collector.reg_def(rd); + collector.reg_use(base); + collector.reg_use(index); + } Inst::StackProbeLoop { probe_count, .. } => { collector.reg_early_def(probe_count); } @@ -3507,6 +3525,56 @@ impl Inst { format!("{mem_str}{op} {rd}, {mem}") } + &Inst::LoadIndexedAddr { + rd, + base, + index, + offset, + size, + } => { + let rd = pretty_print_reg(rd.to_reg()); + let op = match size { + 1 => "lxah", + 2 => "lxaf", + 3 => "lxag", + 4 => "lxaq", + _ => unreachable!(), + }; + let flags = MemFlags::trusted(); + let mem = MemArg::BXD20 { + base, + index, + disp: offset, + flags, + }; + let mem = mem.pretty_print_default(); + format!("{op} {rd}, {mem}") + } + &Inst::LoadLogicalIndexedAddr { + rd, + base, + index, + offset, + size, + } => { + let rd = pretty_print_reg(rd.to_reg()); + let op = match size { + 1 => "llxah", + 2 => "llxaf", + 3 => "llxag", + 4 => "llxaq", + _ => unreachable!(), + }; + let flags = MemFlags::trusted(); + let mem = MemArg::BXD20 { + base, + index, + disp: offset, + flags, + }; + let mem = mem.pretty_print_default(); + format!("{op} {rd}, {mem}") + } &Inst::StackProbeLoop { probe_count, guard_size, diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle index a88e75166aca..500636cc472b 100644 --- a/cranelift/codegen/src/isa/s390x/lower.isle +++ b/cranelift/codegen/src/isa/s390x/lower.isle @@ -115,6 +115,21 @@ (rule 1 (lower (has_type (vr128_ty ty) (iadd _ x y))) (vec_add ty x y)) +(rule 16 (lower (has_type (mie4_enabled) + (iadd $I64 (ishl $I64 (uextend $I64 (iadd $I32 x (simm20_from_value z))) (u8_from_value shift)) y))) + (load_logical_indexed_addr x y z shift)) + +(rule 17 (lower (has_type (mie4_enabled) + (iadd $I64 y (ishl $I64 (uextend $I64 (iadd $I32 x (simm20_from_value z))) (u8_from_value shift))))) + (load_logical_indexed_addr y x z shift)) + +(rule 18 (lower (has_type (and (ty_addr64 _) (mie4_enabled)) + (iadd $I64 (ishl $I64 (sextend $I64 (iadd $I32 x (simm20_from_value z))) (u8_from_value shift)) y))) + (load_indexed_addr x y z shift)) + +(rule 19 (lower (has_type (and (ty_addr64 _) (mie4_enabled)) + (iadd $I64 y (ishl $I64 (sextend $I64 (iadd $I32 x (simm20_from_value z))) (u8_from_value shift))))) + (load_indexed_addr y x z shift)) ;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs index 392eaa397aa5..134329878902 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs @@ -478,6 +478,13 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> { Some(imm) } + #[inline] + fn u16_from_value(&mut self, val: Value) -> Option { + let constant = self.u64_from_value(val)?; + let imm = u16::try_from(constant).ok()?; + Some(imm) + } + #[inline] fn u8_from_value(&mut self, val: Value) -> Option { let constant = self.u64_from_value(val)?; @@ -547,6 +554,12 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> { UImm16Shifted::maybe_from_u64(constant) } + #[inline] + fn simm20_from_value(&mut self, val: Value) -> Option { + let constant = self.u64_from_value(val)? as i64; + SImm20::maybe_from_i64(constant) + } + #[inline] fn uimm32shifted_from_value(&mut self, val: Value) -> Option { let constant = self.u64_from_value(val)?; diff --git a/cranelift/filetests/filetests/isa/s390x/arithmetic-arch15.clif b/cranelift/filetests/filetests/isa/s390x/arithmetic-arch15.clif index e2f9046560e4..499e9c20a79f 100644 --- a/cranelift/filetests/filetests/isa/s390x/arithmetic-arch15.clif +++ b/cranelift/filetests/filetests/isa/s390x/arithmetic-arch15.clif @@ -327,3 +327,49 @@ block0(v0: i128): ; vst %v4, 0(%r2) ; br %r14 +function %i64_i32_offset_mul_unsigned(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = iconst.i8 4 + v3 = iconst.i32 8000 + v4 = iadd v1, v3 + v5 = uextend.i64 v4 + v6 = ishl v5, v2 + v7 = iadd v0, v6 + return v7 +} + +; VCode: +; block0: +; llxaq %r2, 8000(%r3,%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0xe3, 0x23 +; swr %f4, %f0 +; .byte 0x01, 0x69 +; br %r14 + +function %uload8_i64_i64_offset_mul_signed(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = iconst.i8 4 + v3 = iconst.i32 8000 + v4 = iadd v1, v3 + v5 = sextend.i64 v4 + v6 = ishl v5, v2 + v7 = iadd v0, v6 + return v7 +} + +; VCode: +; block0: +; lxaq %r2, 8000(%r3,%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0xe3, 0x23 +; swr %f4, %f0 +; .byte 0x01, 0x68 +; br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/load-arch15.clif b/cranelift/filetests/filetests/isa/s390x/load-arch15.clif new file mode 100644 index 000000000000..54757e867774 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/load-arch15.clif @@ -0,0 +1,80 @@ +test compile precise-output +set enable_multi_ret_implicit_sret +target s390x arch15 + +function %uload8_i64_i32_offset_mul_unsigned(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = iconst.i8 4 + v3 = iconst.i32 8000 + v4 = iadd v1, v3 + v5 = uextend.i64 v4 + v6 = ishl v5, v2 + v7 = iadd v0, v6 + v8 = uload8.i64 v7 + return v8 +} + +; VCode: +; block0: +; llxaq %r3, 8000(%r3,%r2) +; llgc %r2, 0(%r3) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0xe3, 0x33 +; swr %f4, %f0 +; .byte 0x01, 0x69 +; llgc %r2, 0(%r3) ; trap: heap_oob +; br %r14 + +function %uload8_i64_i64_offset_mul_signed(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = iconst.i8 4 + v3 = iconst.i32 8000 + v4 = iadd v1, v3 + v5 = sextend.i64 v4 + v6 = ishl v5, v2 + v7 = iadd v0, v6 + v8 = uload8.i64 v7 + return v8 +} + +; VCode: +; block0: +; lxaq %r3, 8000(%r3,%r2) +; llgc %r2, 0(%r3) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0xe3, 0x33 +; swr %f4, %f0 +; .byte 0x01, 0x68 +; llgc %r2, 0(%r3) ; trap: heap_oob +; br %r14 + +function %uload8_i64_i64_offset_shifted0(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = iconst.i8 0 + v3 = uextend.i64 v1 + v4 = ishl v3, v2 + v5 = iadd v0, v4 + v6 = uload8.i64 v5+1000 + return v6 +} + +; VCode: +; block0: +; llgfr %r5, %r3 +; sllg %r5, %r5, 0 +; llgc %r2, 1000(%r5,%r2) +; br %r14 +; +; Disassembled: +; block0: ; offset 0x0 +; llgfr %r5, %r3 +; sllg %r5, %r5, 0 +; llgc %r2, 0x3e8(%r5, %r2) ; trap: heap_oob +; br %r14 + diff --git a/cranelift/filetests/filetests/runtests/s390x-lxa.clif b/cranelift/filetests/filetests/runtests/s390x-lxa.clif new file mode 100644 index 000000000000..be5cf126e71c --- /dev/null +++ b/cranelift/filetests/filetests/runtests/s390x-lxa.clif @@ -0,0 +1,40 @@ +test interpret +test run +target pulley64 +target s390x arch15 + +function %i64_i32_offset_mul_unsigned(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = iconst.i8 4 + v3 = iconst.i32 0x7fff + v4 = iadd v1, v3 + v5 = uextend.i64 v4 + v6 = ishl v5, v2 + v7 = iadd v0, v6 + return v7 +} + +; run: %i64_i32_offset_mul_unsigned(0, 0) == 0x7fff0 +; run: %i64_i32_offset_mul_unsigned(0, -1) == 0x7ffe0 +; run: %i64_i32_offset_mul_unsigned(-1, -1) == 0x7ffdf +; run: %i64_i32_offset_mul_unsigned(0, 0x7fff_ffff) == 0x8_0007ffe0 +; run: %i64_i32_offset_mul_unsigned(0x7fffffff_ffffffff, 0x7fff_ffff) == 0x80000008_0007ffdf +; run: %i64_i32_offset_mul_unsigned(0x7fffffff_ffffffff, 0x8000_0000) == 0x80000008_0007ffef + +function %i64_i32_offset_mul_signed(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = iconst.i8 4 + v3 = iconst.i32 0x7fff + v4 = iadd v1, v3 + v5 = sextend.i64 v4 + v6 = ishl v5, v2 + v7 = iadd v0, v6 + return v7 +} + +; run: %i64_i32_offset_mul_signed(0, 0) == 0x7fff0 +; run: %i64_i32_offset_mul_signed(0, -1) == 0x7ffe0 +; run: %i64_i32_offset_mul_signed(-1, -1) == 0x7ffdf +; run: %i64_i32_offset_mul_signed(0, 0x7fff_ffff) == 0xfffffff8_0007ffe0 +; run: %i64_i32_offset_mul_signed(0x7fffffff_ffffffff, 0x7fff_ffff) == 0x7ffffff8_0007ffdf +; run: %i64_i32_offset_mul_signed(0x7fffffff_ffffffff, 0x8000_0000) == 0x7ffffff8_0007ffef