diff --git a/Cargo.lock b/Cargo.lock index b0824fd0d3d0..9865f3d25e22 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3421,6 +3421,7 @@ dependencies = [ "addr2line", "anyhow", "async-trait", + "bitflags 2.4.1", "bumpalo", "cc", "cfg-if", diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 36330f432419..a6aaadace2f1 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -3120,10 +3120,10 @@ (decl amode (Type Value i32) AMode) (rule 0 (amode ty val offset) (amode_no_more_iconst ty val offset)) -(rule 1 (amode ty (iadd x (iconst (simm32 y))) offset) +(rule 1 (amode ty (iadd x (i32_from_iconst y)) offset) (if-let new_offset (s32_add_fallible y offset)) (amode_no_more_iconst ty x new_offset)) -(rule 2 (amode ty (iadd (iconst (simm32 x)) y) offset) +(rule 2 (amode ty (iadd (i32_from_iconst x) y) offset) (if-let new_offset (s32_add_fallible x offset)) (amode_no_more_iconst ty y new_offset)) diff --git a/cranelift/codegen/src/isa/riscv64/abi.rs b/cranelift/codegen/src/isa/riscv64/abi.rs index 05f3f3a45ffe..d13949c1ddbf 100644 --- a/cranelift/codegen/src/isa/riscv64/abi.rs +++ b/cranelift/codegen/src/isa/riscv64/abi.rs @@ -855,7 +855,7 @@ fn compute_clobber_size(clobbers: &[Writable]) -> u32 { align_to(clobbered_size, 16) } -const DEFAULT_CLOBBERS: PRegSet = PRegSet::empty() +pub(crate) const DEFAULT_CLOBBERS: PRegSet = PRegSet::empty() .with(px_reg(1)) .with(px_reg(5)) .with(px_reg(6)) diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index 5dc9c998929f..a17872e8f1e5 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -2393,10 +2393,10 @@ ;; ;; We can't recurse into `amode` again since that could cause stack overflows. ;; See: https://github.com/bytecodealliance/wasmtime/pull/6968 -(rule 1 (amode (iadd addr (iconst (simm32 y))) offset) +(rule 1 (amode (iadd addr (i32_from_iconst y)) offset) (if-let new_offset (s32_add_fallible y offset)) (amode_inner addr new_offset)) -(rule 2 (amode (iadd (iconst (simm32 x)) addr) offset) +(rule 2 (amode (iadd (i32_from_iconst x) addr) offset) (if-let new_offset (s32_add_fallible x offset)) (amode_inner addr new_offset)) diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index db3b3c198357..044368a5bc16 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -47,14 +47,14 @@ pub enum EmitVState { #[derive(Default, Clone, Debug)] pub struct EmitState { /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`. - stack_map: Option, + pub(crate) stack_map: Option, /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and /// optimized away at compiletime. See [cranelift_control]. - ctrl_plane: ControlPlane, + pub(crate) ctrl_plane: ControlPlane, /// Vector State /// Controls the current state of the vector unit at the emission point. - vstate: EmitVState, - frame_layout: FrameLayout, + pub(crate) vstate: EmitVState, + pub(crate) frame_layout: FrameLayout, } impl EmitState { diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs b/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs index 5a00e4f4dcf2..ffa5f118f24a 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs @@ -1,6 +1,6 @@ #[allow(unused)] use crate::ir::LibCall; -use crate::isa::riscv64::inst::*; +use crate::isa::riscv64::{abi::DEFAULT_CLOBBERS, inst::*}; use std::borrow::Cow; fn fa7() -> Reg { @@ -2135,7 +2135,7 @@ fn riscv64_worst_case_instruction_size() { let (flags, isa_flags) = make_test_flags(); let emit_info = EmitInfo::new(flags, isa_flags); - //there are all candidates potential generate a lot of bytes. + // These are all candidate instructions with potential to generate a lot of bytes. let mut candidates: Vec = vec![]; candidates.push(Inst::Popcnt { @@ -2198,10 +2198,40 @@ fn riscv64_worst_case_instruction_size() { }), ); + candidates.push(Inst::ReturnCallInd { + callee: a0(), + info: Box::new(ReturnCallInfo { + opcode: Opcode::ReturnCallIndirect, + new_stack_arg_size: 64, + uses: DEFAULT_CLOBBERS + .into_iter() + .map(|reg| CallArgPair { + vreg: reg.into(), + preg: reg.into(), + }) + .collect(), + }), + }); + let mut max: (u32, MInst) = (0, Inst::Nop0); for i in candidates { let mut buffer = MachBuffer::new(); - i.emit(&mut buffer, &emit_info, &mut Default::default()); + let mut emit_state = EmitState { + // This frame layout is important to ensure that the ReturnCallIndirect + // instruction in this test, becomes as large as practically possible. + frame_layout: FrameLayout { + tail_args_size: 64, + setup_area_size: 8192, + clobbered_callee_saves: DEFAULT_CLOBBERS + .into_iter() + .filter(|r| r.class() != RegClass::Vector) + .map(|r| Writable::from_reg(r.into())) + .collect(), + ..Default::default() + }, + ..Default::default() + }; + i.emit(&mut buffer, &emit_info, &mut emit_state); let buffer = buffer.finish(&Default::default(), &mut Default::default()); let length = buffer.data().len() as u32; if length > max.0 { diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index a846c6024974..b03f8e98e411 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -844,8 +844,8 @@ impl MachInst for Inst { } fn worst_case_size() -> CodeOffset { - // calculate by test function riscv64_worst_case_instruction_size() - 124 + // Our worst case size is determined by the riscv64_worst_case_instruction_size test + 168 } fn ref_type_regclass(_settings: &settings::Flags) -> RegClass { diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index d287e0c7b877..04de11d1f16c 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -2465,20 +2465,25 @@ ;; These rules should probably be handled in `gen_bitcast`, but it's convenient to have that return ;; a single register, instead of a `ValueRegs` -(rule 2 (lower (has_type $I128 (bitcast _ v @ (value_type (ty_vec_fits_in_register _))))) +(rule 3 (lower (has_type $I128 (bitcast _ v @ (value_type (ty_vec_fits_in_register _))))) (value_regs (gen_extractlane $I64X2 v 0) (gen_extractlane $I64X2 v 1))) ;; Move the high half into a vector register, and then use vslide1up to move it up and ;; insert the lower half in one instruction. -(rule 1 (lower (has_type (ty_vec_fits_in_register _) (bitcast _ v @ (value_type $I128)))) +(rule 2 (lower (has_type (ty_vec_fits_in_register _) (bitcast _ v @ (value_type $I128)))) (let ((lo XReg (value_regs_get v 0)) (hi XReg (value_regs_get v 1)) (vstate VState (vstate_from_type $I64X2)) (vec VReg (rv_vmv_sx hi vstate))) (rv_vslide1up_vx vec vec lo (unmasked) vstate))) +;; `gen_bitcast` below only works with single register values, so handle I128 +;; specially here. +(rule 1 (lower (has_type $I128 (bitcast _ v @ (value_type $I128)))) + v) + (rule 0 (lower (has_type out_ty (bitcast _ v @ (value_type in_ty)))) (gen_bitcast v in_ty out_ty)) diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index a684efa900c8..00ab2c16ca98 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -1116,22 +1116,22 @@ (rule 0 (to_amode_add flags x y offset) (amode_imm_reg_reg_shift flags x y offset)) -(rule 1 (to_amode_add flags x (iconst (simm32 c)) offset) +(rule 1 (to_amode_add flags x (i32_from_iconst c) offset) (if-let sum (s32_add_fallible offset c)) (amode_imm_reg flags x sum)) -(rule 2 (to_amode_add flags (iconst (simm32 c)) x offset) +(rule 2 (to_amode_add flags (i32_from_iconst c) x offset) (if-let sum (s32_add_fallible offset c)) (amode_imm_reg flags x sum)) -(rule 3 (to_amode_add flags (iadd x (iconst (simm32 c))) y offset) +(rule 3 (to_amode_add flags (iadd x (i32_from_iconst c)) y offset) (if-let sum (s32_add_fallible offset c)) (amode_imm_reg_reg_shift flags x y sum)) -(rule 4 (to_amode_add flags (iadd (iconst (simm32 c)) x) y offset) +(rule 4 (to_amode_add flags (iadd (i32_from_iconst c) x) y offset) (if-let sum (s32_add_fallible offset c)) (amode_imm_reg_reg_shift flags x y sum)) -(rule 5 (to_amode_add flags x (iadd y (iconst (simm32 c))) offset) +(rule 5 (to_amode_add flags x (iadd y (i32_from_iconst c)) offset) (if-let sum (s32_add_fallible offset c)) (amode_imm_reg_reg_shift flags x y sum)) -(rule 6 (to_amode_add flags x (iadd (iconst (simm32 c)) y) offset) +(rule 6 (to_amode_add flags x (iadd (i32_from_iconst c) y) offset) (if-let sum (s32_add_fallible offset c)) (amode_imm_reg_reg_shift flags x y sum)) diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 92d8b74780b0..773a44d36499 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -979,9 +979,9 @@ (x64_imul ty y x)) ;; lift out constants to use 3-operand form -(rule -3 (lower (has_type (ty_int_ref_16_to_64 ty) (imul x (iconst (simm32 y))))) +(rule -3 (lower (has_type (ty_int_ref_16_to_64 ty) (imul x (i32_from_iconst y)))) (x64_imul_imm ty x y)) -(rule -2 (lower (has_type (ty_int_ref_16_to_64 ty) (imul (iconst (simm32 x)) y))) +(rule -2 (lower (has_type (ty_int_ref_16_to_64 ty) (imul (i32_from_iconst x) y))) (x64_imul_imm ty y x)) ;; `i128`. @@ -2970,9 +2970,10 @@ (x64_movrm $I32 (to_amode flags address offset) value))) ;; IMM stores -(rule 2 (lower (store flags (has_type (fits_in_64 ty) (iconst (simm32 value))) address offset)) +(rule 4 (lower (store flags value @ (value_type (fits_in_64 ty)) address offset)) + (if-let (i32_from_iconst imm) value) (side_effect - (x64_movimm_m ty (to_amode flags address offset) value))) + (x64_movimm_m ty (to_amode flags address offset) imm))) ;; F32 stores of values in XMM registers. (rule 1 (lower (store flags @@ -3308,6 +3309,12 @@ (rule 2 (lower_branch (brif (maybe_uextend (fcmp cc a b)) _ _) (two_targets then else)) (emit_side_effect (jmp_cond_fcmp (emit_fcmp cc a b) then else))) +(rule 2 (lower_branch (brif (maybe_uextend (vany_true a)) _ _) (two_targets then else)) + (emit_side_effect (jmp_cond_icmp (emit_vany_true a) then else))) + +(rule 2 (lower_branch (brif (maybe_uextend (vall_true a)) _ _) (two_targets then else)) + (emit_side_effect (jmp_cond_icmp (emit_vall_true a) then else))) + (rule 1 (lower_branch (brif val @ (value_type $I128) _ _) (two_targets then else)) (emit_side_effect (jmp_cond_icmp (cmp_zero_i128 (CC.Z) val) then else))) @@ -4263,10 +4270,9 @@ ;; TODO use Inst::gen_constant() instead. (x64_xmm_load_const ty (const_to_vconst const))) -;; Special case for a zero-vector: don't load, xor instead. -(rule 1 (lower (has_type ty (vconst (u128_from_constant 0)))) - (let ((dst Xmm (xmm_uninit_value))) - (x64_pxor dst dst))) +;; Special cases for known constant patterns to skip a 16-byte load. +(rule 1 (lower (has_type ty (vconst (u128_from_constant 0)))) (xmm_zero ty)) +(rule 1 (lower (has_type ty (vconst (u128_from_constant -1)))) (vector_all_ones)) ;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -4630,30 +4636,38 @@ ;; 0xffff then every byte was equal to zero, so test if the comparison is ;; not-equal or NZ. (rule (lower (vany_true val)) + (lower_icmp_bool (emit_vany_true val))) + +(decl emit_vany_true (Value) IcmpCondResult) +(rule (emit_vany_true val) (let ( (any_byte_zero Xmm (x64_pcmpeqb val (xmm_zero $I8X16))) (mask Gpr (x64_pmovmskb (OperandSize.Size32) any_byte_zero)) ) - (with_flags (x64_cmp_imm (OperandSize.Size32) mask 0xffff) - (x64_setcc (CC.NZ))))) + (icmp_cond_result (x64_cmp_imm (OperandSize.Size32) mask 0xffff) + (CC.NZ)))) ;; Rules for `vall_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule 1 (lower (vall_true val @ (value_type ty))) +(rule (lower (vall_true val)) + (lower_icmp_bool (emit_vall_true val))) + +(decl emit_vall_true (Value) IcmpCondResult) +(rule 1 (emit_vall_true val @ (value_type ty)) (if-let $true (use_sse41)) (let ((src Xmm val) (zeros Xmm (xmm_zero ty)) (cmp Xmm (x64_pcmpeq (vec_int_type ty) src zeros))) - (with_flags (x64_ptest cmp cmp) (x64_setcc (CC.Z))))) + (icmp_cond_result (x64_ptest cmp cmp) (CC.Z)))) ;; Perform an appropriately-sized lane-wise comparison with zero. If the ;; result is all 0s then all of them are true because nothing was equal to ;; zero. -(rule (lower (vall_true val @ (value_type ty))) +(rule (emit_vall_true val @ (value_type ty)) (let ((lanes_with_zero Xmm (x64_pcmpeq (vec_int_type ty) val (xmm_zero ty))) (mask Gpr (x64_pmovmskb (OperandSize.Size32) lanes_with_zero))) - (with_flags (x64_test (OperandSize.Size32) mask mask) - (x64_setcc (CC.Z))))) + (icmp_cond_result (x64_test (OperandSize.Size32) mask mask) + (CC.Z)))) ;; Rules for `vhigh_bits` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 547eb9048abd..137cf239a73e 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -235,6 +235,10 @@ macro_rules! isle_lower_prelude_methods { Some((constant << shift_amt) >> shift_amt) } + fn i32_from_iconst(&mut self, val: Value) -> Option { + self.i64_from_iconst(val)?.try_into().ok() + } + fn zero_value(&mut self, value: Value) -> Option { let insn = self.def_inst(value); if insn.is_some() { @@ -568,11 +572,6 @@ macro_rules! isle_lower_prelude_methods { Some(value) } - #[inline] - fn simm32(&mut self, x: Imm64) -> Option { - i64::from(x).try_into().ok() - } - #[inline] fn uimm8(&mut self, x: Imm64) -> Option { let x64: i64 = x.into(); diff --git a/cranelift/codegen/src/prelude_lower.isle b/cranelift/codegen/src/prelude_lower.isle index 7d2037197c06..31a3c6c613f9 100644 --- a/cranelift/codegen/src/prelude_lower.isle +++ b/cranelift/codegen/src/prelude_lower.isle @@ -284,6 +284,11 @@ (extractor (u64_from_iconst x) (def_inst (iconst (u64_from_imm64 x)))) +;; Extract a constant `i32` from a value defined by an `iconst`. +;; The value is sign extended to 32 bits. +(decl i32_from_iconst (i32) Value) +(extern extractor i32_from_iconst i32_from_iconst) + ;; Extract a constant `i64` from a value defined by an `iconst`. ;; The value is sign extended to 64 bits. (decl i64_from_iconst (i64) Value) @@ -302,14 +307,6 @@ (decl maybe_uextend (Value) Value) (extern extractor maybe_uextend maybe_uextend) -;; Get a signed 32-bit immediate in an u32 from an Imm64, if possible. -;; Note that this checks that the raw i64 value from the Imm64 fits in i32, -;; so `-1_u32` will not actually match -- it's treated as `0xFFFF_FFFF_i64`, -;; which doesn't fit in an i32 and thus doesn't match the extractor. -;; An Imm64 of `-1_i64` *will* match, however. -(decl simm32 (i32) Imm64) -(extern extractor simm32 simm32) - ;; Get an unsigned 8-bit immediate in a u8 from an Imm64, if possible. (decl uimm8 (u8) Imm64) (extern extractor uimm8 uimm8) diff --git a/cranelift/filetests/filetests/isa/riscv64/issue8847-1.clif b/cranelift/filetests/filetests/isa/riscv64/issue8847-1.clif new file mode 100644 index 000000000000..64dae40ca8ef --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/issue8847-1.clif @@ -0,0 +1,57 @@ +;; Compile test case + +test compile +target riscv64 + +function u1:0() tail { + ss0 = explicit_slot 50, align = 512 + ss1 = explicit_slot 47, align = 4 + ss2 = explicit_slot 34, align = 32 + ss3 = explicit_slot 103, align = 1024 + ss4 = explicit_slot 110, align = 512 + ss5 = explicit_slot 126, align = 512 + sig0 = (i64 sext, i64 sext, i64 sext, i64 sext, i64 sext, i64 sext, i64 sext, i8 uext, i16 uext, i16, i64 sext, i64 sext, i128 uext, i8 sext, f32) tail + +block0: + v0 = iconst.i64 0xef31_de2a_2352_79ff + v3 = iconst.i16 0xffef + v164 = iconst.i64 0 + v7 = uextend.i128 v164 ; v164 = 0 + v14 = iconst.i8 203 + v15 = f32const -0x1.979796p24 + v112 = iconst.i8 0 + v134 = iconst.i8 0 + v147 = iconst.i8 0 + v154 = iconst.i8 0 + v156 = iconst.i32 0 + v157 = iconst.i32 0 + v163 = iconst.i64 0 + brif v112, block40, block39 ; v112 = 0 + +block40: + trap user0 + +block39: + brif.i8 v134, block58, block57 ; v134 = 0 + +block58: + trap user0 + +block57: + brif.i8 v147, block68, block67 ; v147 = 0 + +block68: + trap user0 + +block67: + brif.i8 v154, block73, block72 ; v154 = 0 + +block73: + br_table v156, block1, [block1, block1] ; v156 = 0 + +block72: + br_table v157, block1, [block1, block1] ; v157 = 0 + +block1 cold: + return_call_indirect.i64 sig0, v163(v0, v0, v0, v0, v0, v0, v0, v14, v3, v3, v0, v0, v7, v14, v15) ; v163 = 0, v0 = 0xef31_de2a_2352_79ff, v0 = 0xef31_de2a_2352_79ff, v0 = 0xef31_de2a_2352_79ff, v0 = 0xef31_de2a_2352_79ff, v0 = 0xef31_de2a_2352_79ff, v0 = 0xef31_de2a_2352_79ff, v0 = 0xef31_de2a_2352_79ff, v14 = 203, v3 = 0xffef, v3 = 0xffef, v0 = 0xef31_de2a_2352_79ff, v0 = 0xef31_de2a_2352_79ff, v14 = 203, v15 = -0x1.979796p24 +} diff --git a/cranelift/filetests/filetests/isa/riscv64/issue8847.clif b/cranelift/filetests/filetests/isa/riscv64/issue8847.clif new file mode 100644 index 000000000000..c7c79d5b8809 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/issue8847.clif @@ -0,0 +1,480 @@ +;; Compile test case + +test compile +set bb_padding_log2_minus_one=4 +set enable_alias_analysis=false +set enable_llvm_abi_extensions=true +set machine_code_cfg_info=true +set enable_jump_tables=false +set enable_heap_access_spectre_mitigation=false +target riscv64 has_zcd has_zbkb has_zbc has_zbs has_zicond has_zvl32b has_zvl64b has_zvl128b has_zvl1024b has_zvl2048b has_zvl4096b has_zvl8192b has_zvl16384b has_zvl32768b + +function u1:0() tail { + ss0 = explicit_slot 50, align = 512 + ss1 = explicit_slot 47, align = 4 + ss2 = explicit_slot 34, align = 32 + ss3 = explicit_slot 103, align = 1024 + ss4 = explicit_slot 110, align = 512 + ss5 = explicit_slot 126, align = 512 + sig0 = (f32, f64, f64, f32, i8 uext, i128, i8 uext, i32, i16 sext, i64, i64 sext, i128, i8 sext, i8, i64, i64 sext) -> i16 sext, i64 sext, f64, i32 sext, f64, i8 sext, i64 sext, f32 + sig1 = () system_v + sig2 = (i128, i16 sext, i128 sext, i32 sext, i16, i64 uext, f32, i8 sext, f32, i8, i64, i64, i64, i64 uext, f64) -> f64, i8 uext, f32, i128, i64 uext, i8, i16 sext, i64 sext tail + sig3 = () -> i8 sext, f32, i128, i32, f32, i128 uext, i8, i8 uext, f64, i8 sext, f32 system_v + sig4 = (i8, i16, i64 sext, i64 sext, i128 uext, i8, i32, f64, i32, f32, i128 uext, i8, i8 uext, f64, f64) -> i8 uext system_v + sig5 = (i8 sext, i64 uext, i16 sext, i64 sext, i128 uext, i128 sext, f32, i16 uext, i64 sext, i32 sext, i64, i64 uext, f64, f64, i16 sext) -> f32 tail + sig6 = (i64 sext, i64 sext, i64 sext, i64 sext, i64 sext, i64 sext, i64 sext, i8 uext, i16 uext, i16, i64 sext, i64 sext, i128 uext, i8 sext, f32) tail + sig7 = (f32) -> f32 system_v + sig8 = (f64) -> f64 system_v + sig9 = (f32) -> f32 system_v + sig10 = (f64) -> f64 system_v + sig11 = (f32) -> f32 system_v + sig12 = (f64) -> f64 system_v + sig13 = (f32) -> f32 system_v + sig14 = (f64) -> f64 system_v + sig15 = (f32, f32, f32) -> f32 system_v + sig16 = (f64, f64, f64) -> f64 system_v + fn0 = colocated u2:0 sig0 + fn1 = colocated u2:1 sig1 + fn2 = colocated u2:2 sig2 + fn3 = colocated u2:3 sig3 + fn4 = colocated u2:4 sig4 + fn5 = colocated u2:5 sig5 + fn6 = colocated u2:6 sig6 + fn7 = %CeilF32 sig7 + fn8 = %CeilF64 sig8 + fn9 = %FloorF32 sig9 + fn10 = %FloorF64 sig10 + fn11 = colocated %TruncF32 sig11 + fn12 = %TruncF64 sig12 + fn13 = colocated %NearestF32 sig13 + fn14 = %NearestF64 sig14 + fn15 = %FmaF32 sig15 + fn16 = %FmaF64 sig16 + +block0: + v0 = iconst.i64 0xef31_de2a_2352_79ff + v158 -> v0 + v1 = iconst.i64 0x2231_ffd1_ff29_ff26 + v2 = f64const 0x1.8ff2320672823p-225 + v3 = iconst.i16 0xffef + v160 -> v3 + v4 = iconst.i64 0xddde_2a23_52f9_ffff + v5 = iconst.i64 0xc8c8_c8c8_c8c8_c8c8 + v6 = iconst.i64 0xc8c8_c8c8_c8c8_c8c8 + v7 = iconcat v6, v5 ; v6 = 0xc8c8_c8c8_c8c8_c8c8, v5 = 0xc8c8_c8c8_c8c8_c8c8 + v161 -> v7 + v8 = iconst.i64 0xc8c8_c8c8_c8c8_c8c8 + v9 = iconst.i64 0xc8c8_c8c8_c8c8_c8c8 + v10 = iconcat v9, v8 ; v9 = 0xc8c8_c8c8_c8c8_c8c8, v8 = 0xc8c8_c8c8_c8c8_c8c8 + v11 = iconst.i64 0xcbcb_cbcb_cbc8_c8c8 + v12 = iconst.i64 0xc8c8_c8c8_c8c8_c8c8 + v13 = iconcat v12, v11 ; v12 = 0xc8c8_c8c8_c8c8_c8c8, v11 = 0xcbcb_cbcb_cbc8_c8c8 + v14 = iconst.i8 203 + v159 -> v14 + v15 = f32const -0x1.979796p24 + v162 -> v15 + v16 = iconst.i64 0x0031_2222_2a2f + v17 = iconst.i64 0xcbcb_2adc_9e98_d7d4 + v18 = iconcat v17, v16 ; v17 = 0xcbcb_2adc_9e98_d7d4, v16 = 0x0031_2222_2a2f + v19 = iconst.i8 0 + v20 = iconst.i16 0 + v21 = iconst.i32 0 + v22 = iconst.i64 0 + v23 = uextend.i128 v22 ; v22 = 0 + v24 = stack_addr.i64 ss2 + store notrap table v23, v24 + v25 = stack_addr.i64 ss2+16 + store notrap table v23, v25 + v26 = stack_addr.i64 ss2+32 + store notrap table v20, v26 ; v20 = 0 + v27 = stack_addr.i64 ss1 + store notrap table v23, v27 + v28 = stack_addr.i64 ss1+16 + store notrap table v23, v28 + v29 = stack_addr.i64 ss1+32 + store notrap table v22, v29 ; v22 = 0 + v30 = stack_addr.i64 ss1+40 + store notrap table v21, v30 ; v21 = 0 + v31 = stack_addr.i64 ss1+44 + store notrap table v20, v31 ; v20 = 0 + v32 = stack_addr.i64 ss1+46 + store notrap table v19, v32 ; v19 = 0 + v33 = stack_addr.i64 ss0 + store notrap table v23, v33 + v34 = stack_addr.i64 ss0+16 + store notrap table v23, v34 + v35 = stack_addr.i64 ss0+32 + store notrap table v23, v35 + v36 = stack_addr.i64 ss0+48 + store notrap table v20, v36 ; v20 = 0 + v37 = stack_addr.i64 ss3 + store notrap vmctx v23, v37 + v38 = stack_addr.i64 ss3+16 + store notrap vmctx v23, v38 + v39 = stack_addr.i64 ss3+32 + store notrap vmctx v23, v39 + v40 = stack_addr.i64 ss3+48 + store notrap vmctx v23, v40 + v41 = stack_addr.i64 ss3+64 + store notrap vmctx v23, v41 + v42 = stack_addr.i64 ss3+80 + store notrap vmctx v23, v42 + v43 = stack_addr.i64 ss3+96 + store notrap vmctx v21, v43 ; v21 = 0 + v44 = stack_addr.i64 ss3+100 + store notrap vmctx v20, v44 ; v20 = 0 + v45 = stack_addr.i64 ss3+102 + store notrap vmctx v19, v45 ; v19 = 0 + v46 = stack_addr.i64 ss4 + store notrap heap v23, v46 + v47 = stack_addr.i64 ss4+16 + store notrap heap v23, v47 + v48 = stack_addr.i64 ss4+32 + store notrap heap v23, v48 + v49 = stack_addr.i64 ss4+48 + store notrap heap v23, v49 + v50 = stack_addr.i64 ss4+64 + store notrap heap v23, v50 + v51 = stack_addr.i64 ss4+80 + store notrap heap v23, v51 + v52 = stack_addr.i64 ss4+96 + store notrap heap v22, v52 ; v22 = 0 + v53 = stack_addr.i64 ss4+104 + store notrap heap v21, v53 ; v21 = 0 + v54 = stack_addr.i64 ss4+108 + store notrap heap v20, v54 ; v20 = 0 + v55 = stack_addr.i64 ss5 + store notrap vmctx v23, v55 + v56 = stack_addr.i64 ss5+16 + store notrap vmctx v23, v56 + v57 = stack_addr.i64 ss5+32 + store notrap vmctx v23, v57 + v58 = stack_addr.i64 ss5+48 + store notrap vmctx v23, v58 + v59 = stack_addr.i64 ss5+64 + store notrap vmctx v23, v59 + v60 = stack_addr.i64 ss5+80 + store notrap vmctx v23, v60 + v61 = stack_addr.i64 ss5+96 + store notrap vmctx v23, v61 + v62 = stack_addr.i64 ss5+112 + store notrap vmctx v22, v62 ; v22 = 0 + v63 = stack_addr.i64 ss5+120 + store notrap vmctx v21, v63 ; v21 = 0 + v64 = stack_addr.i64 ss5+124 + store notrap vmctx v20, v64 ; v20 = 0 + v65 = icmp_imm uge v3, 0x5123 ; v3 = 0xffef + brif v65, block3, block2 + +block3: + v66 = icmp_imm.i16 uge v3, 0xd5d7 ; v3 = 0xffef + brif v66, block5, block4 + +block5: + v67 = icmp_imm.i16 uge v3, 0xf6ff ; v3 = 0xffef + brif v67, block7, block6 + +block7: + v68 = icmp_imm.i16 uge v3, 0xff22 ; v3 = 0xffef + brif v68, block9, block8 + +block9: + v69 = icmp_imm.i16 eq v3, 0xffdd ; v3 = 0xffef + brif v69, block1, block10 + +block10: + v70 = icmp_imm.i16 uge v3, 0xff79 ; v3 = 0xffef + brif v70, block12, block11 + +block12: + v71 = iadd_imm.i16 v3, 0xffff_ffff_ffff_0087 ; v3 = 0xffef + v72 = uextend.i32 v71 + br_table v72, block1, [block1, block1, block1, block1, block1, block1] + +block11: + v73 = icmp_imm.i16 uge v3, 0xff22 ; v3 = 0xffef + brif v73, block13, block1 + +block13: + v74 = iadd_imm.i16 v3, 0xffff_ffff_ffff_00de ; v3 = 0xffef + v75 = uextend.i32 v74 + br_table v75, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block8: + v76 = icmp_imm.i16 eq v3, 0xf951 ; v3 = 0xffef + brif v76, block1, block14 + +block14: + v77 = icmp_imm.i16 uge v3, 0xf6ff ; v3 = 0xffef + brif v77, block15, block1 + +block15: + v78 = iadd_imm.i16 v3, 0xffff_ffff_ffff_0901 ; v3 = 0xffef + v79 = uextend.i32 v78 + br_table v79, block1, [block1, block1, block1, block1, block1, block1, block1] + +block6: + v80 = icmp_imm.i16 uge v3, 0xef2a ; v3 = 0xffef + brif v80, block17, block16 + +block17: + v81 = icmp_imm.i16 eq v3, 0xf426 ; v3 = 0xffef + brif v81, block1, block18 + +block18: + v82 = icmp_imm.i16 uge v3, 0xefff ; v3 = 0xffef + brif v82, block20, block19 + +block20: + v83 = iadd_imm.i16 v3, 0xffff_ffff_ffff_1001 ; v3 = 0xffef + v84 = uextend.i32 v83 + br_table v84, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block19: + v85 = icmp_imm.i16 uge v3, 0xef2a ; v3 = 0xffef + brif v85, block21, block1 + +block21: + v86 = iadd_imm.i16 v3, 0xffff_ffff_ffff_10d6 ; v3 = 0xffef + v87 = uextend.i32 v86 + br_table v87, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block16: + v88 = icmp_imm.i16 eq v3, 0xdc2a ; v3 = 0xffef + brif v88, block1, block22 + +block22: + v89 = icmp_imm.i16 uge v3, 0xd5d7 ; v3 = 0xffef + brif v89, block23, block1 + +block23: + v90 = iadd_imm.i16 v3, 0xffff_ffff_ffff_2a29 ; v3 = 0xffef + v91 = uextend.i32 v90 + br_table v91, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block4: + v92 = icmp_imm.i16 uge v3, 0x7363 ; v3 = 0xffef + brif v92, block25, block24 + +block25: + v93 = icmp_imm.i16 uge v3, 0x9f22 ; v3 = 0xffef + brif v93, block27, block26 + +block27: + v94 = icmp_imm.i16 uge v3, 0xbf41 ; v3 = 0xffef + brif v94, block29, block28 + +block29: + v95 = iadd_imm.i16 v3, 0xffff_ffff_ffff_40bf ; v3 = 0xffef + v96 = uextend.i32 v95 + br_table v96, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block28: + v97 = icmp_imm.i16 eq v3, 0xae73 ; v3 = 0xffef + brif v97, block1, block30 + +block30: + v98 = icmp_imm.i16 eq v3, 0x9f22 ; v3 = 0xffef + brif v98, block1, block1 + +block26: + v99 = icmp_imm.i16 uge v3, 0x9301 ; v3 = 0xffef + brif v99, block32, block31 + +block32: + v100 = iadd_imm.i16 v3, 0xffff_ffff_ffff_6cff ; v3 = 0xffef + v101 = uextend.i32 v100 + br_table v101, block1, [block1, block1, block1, block1, block1, block1, block1] + +block31: + v102 = icmp_imm.i16 uge v3, 0x7363 ; v3 = 0xffef + brif v102, block33, block1 + +block33: + v103 = iadd_imm.i16 v3, 0xffff_ffff_ffff_8c9d ; v3 = 0xffef + v104 = uextend.i32 v103 + br_table v104, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block24: + v105 = icmp_imm.i16 uge v3, 0x56cc ; v3 = 0xffef + brif v105, block35, block34 + +block35: + v106 = icmp_imm.i16 eq v3, 0x6720 ; v3 = 0xffef + brif v106, block1, block36 + +block36: + v107 = icmp_imm.i16 eq v3, 0x56cc ; v3 = 0xffef + brif v107, block1, block1 + +block34: + v108 = icmp_imm.i16 eq v3, 0x5230 ; v3 = 0xffef + brif v108, block1, block37 + +block37: + v109 = icmp_imm.i16 uge v3, 0x5123 ; v3 = 0xffef + brif v109, block38, block1 + +block38: + v110 = iadd_imm.i16 v3, 0xffff_ffff_ffff_aedd ; v3 = 0xffef + v111 = uextend.i32 v110 + br_table v111, block1, [block1, block1, block1, block1, block1, block1, block1] + +block2: + v112 = icmp_imm.i16 uge v3, 0x2a20 ; v3 = 0xffef + brif v112, block40, block39 + +block40: + v113 = icmp_imm.i16 uge v3, 0x2f22 ; v3 = 0xffef + brif v113, block42, block41 + +block42: + v114 = icmp_imm.i16 uge v3, 0x3320 ; v3 = 0xffef + brif v114, block44, block43 + +block44: + v115 = icmp_imm.i16 uge v3, 0x504d ; v3 = 0xffef + brif v115, block46, block45 + +block46: + v116 = iadd_imm.i16 v3, 0xffff_ffff_ffff_afb3 ; v3 = 0xffef + v117 = uextend.i32 v116 + br_table v117, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block45: + v118 = icmp_imm.i16 eq v3, 0x4118 ; v3 = 0xffef + brif v118, block1, block47 + +block47: + v119 = icmp_imm.i16 eq v3, 0x3320 ; v3 = 0xffef + brif v119, block1, block1 + +block43: + v120 = icmp_imm.i16 uge v3, 0x2f2a ; v3 = 0xffef + brif v120, block49, block48 + +block49: + v121 = iadd_imm.i16 v3, 0xffff_ffff_ffff_d0d6 ; v3 = 0xffef + v122 = uextend.i32 v121 + br_table v122, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block48: + v123 = icmp_imm.i16 eq v3, 0x2f22 ; v3 = 0xffef + brif v123, block1, block1 + +block41: + v124 = icmp_imm.i16 uge v3, 0x2a67 ; v3 = 0xffef + brif v124, block51, block50 + +block51: + v125 = icmp_imm.i16 uge v3, 0x2ade ; v3 = 0xffef + brif v125, block53, block52 + +block53: + v126 = iadd_imm.i16 v3, 0xffff_ffff_ffff_d522 ; v3 = 0xffef + v127 = uextend.i32 v126 + br_table v127, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block52: + v128 = icmp_imm.i16 eq v3, 0x2ab2 ; v3 = 0xffef + brif v128, block1, block54 + +block54: + v129 = icmp_imm.i16 uge v3, 0x2a67 ; v3 = 0xffef + brif v129, block55, block1 + +block55: + v130 = iadd_imm.i16 v3, 0xffff_ffff_ffff_d599 ; v3 = 0xffef + v131 = uextend.i32 v130 + br_table v131, block1, [block1, block1, block1, block1, block1, block1] + +block50: + v132 = icmp_imm.i16 eq v3, 0x2a38 ; v3 = 0xffef + brif v132, block1, block56 + +block56: + v133 = icmp_imm.i16 eq v3, 0x2a20 ; v3 = 0xffef + brif v133, block1, block1 + +block39: + v134 = icmp_imm.i16 uge v3, 512 ; v3 = 0xffef + brif v134, block58, block57 + +block58: + v135 = icmp_imm.i16 uge v3, 8241 ; v3 = 0xffef + brif v135, block60, block59 + +block60: + v136 = icmp_imm.i16 eq v3, 9983 ; v3 = 0xffef + brif v136, block1, block61 + +block61: + v137 = icmp_imm.i16 uge v3, 8738 ; v3 = 0xffef + brif v137, block63, block62 + +block63: + v138 = iadd_imm.i16 v3, -8738 ; v3 = 0xffef + v139 = uextend.i32 v138 + br_table v139, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block62: + v140 = icmp_imm.i16 uge v3, 8241 ; v3 = 0xffef + brif v140, block64, block1 + +block64: + v141 = iadd_imm.i16 v3, -8241 ; v3 = 0xffef + v142 = uextend.i32 v141 + br_table v142, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block59: + v143 = icmp_imm.i16 eq v3, 6493 ; v3 = 0xffef + brif v143, block1, block65 + +block65: + v144 = icmp_imm.i16 uge v3, 512 ; v3 = 0xffef + brif v144, block66, block1 + +block66: + v145 = iadd_imm.i16 v3, -512 ; v3 = 0xffef + v146 = uextend.i32 v145 + br_table v146, block1, [block1, block1, block1] + +block57: + v147 = icmp_imm.i16 uge v3, 212 ; v3 = 0xffef + brif v147, block68, block67 + +block68: + v148 = icmp_imm.i16 uge v3, 341 ; v3 = 0xffef + brif v148, block70, block69 + +block70: + v149 = iadd_imm.i16 v3, -341 ; v3 = 0xffef + v150 = uextend.i32 v149 + br_table v150, block1, [block1, block1] + +block69: + v151 = icmp_imm.i16 uge v3, 212 ; v3 = 0xffef + brif v151, block71, block1 + +block71: + v152 = iadd_imm.i16 v3, -212 ; v3 = 0xffef + v153 = uextend.i32 v152 + br_table v153, block1, [block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1, block1] + +block67: + v154 = icmp_imm.i16 uge v3, 128 ; v3 = 0xffef + brif v154, block73, block72 + +block73: + v155 = iadd_imm.i16 v3, -128 ; v3 = 0xffef + v156 = uextend.i32 v155 + br_table v156, block1, [block1, block1] + +block72: + v157 = uextend.i32 v3 ; v3 = 0xffef + br_table v157, block1, [block1, block1] + +block1 cold: + v163 = func_addr.i64 fn6 + return_call_indirect sig6, v163(v158, v158, v158, v158, v158, v158, v158, v159, v160, v160, v158, v158, v161, v159, v162) ; v158 = 0xef31_de2a_2352_79ff, v158 = 0xef31_de2a_2352_79ff, v158 = 0xef31_de2a_2352_79ff, v158 = 0xef31_de2a_2352_79ff, v158 = 0xef31_de2a_2352_79ff, v158 = 0xef31_de2a_2352_79ff, v158 = 0xef31_de2a_2352_79ff, v159 = 203, v160 = 0xffef, v160 = 0xffef, v158 = 0xef31_de2a_2352_79ff, v158 = 0xef31_de2a_2352_79ff, v159 = 203, v162 = -0x1.979796p24 +} diff --git a/cranelift/filetests/filetests/isa/riscv64/return-call.clif b/cranelift/filetests/filetests/isa/riscv64/return-call.clif index e119025b92a4..5a575cb1ae2f 100644 --- a/cranelift/filetests/filetests/isa/riscv64/return-call.clif +++ b/cranelift/filetests/filetests/isa/riscv64/return-call.clif @@ -693,8 +693,11 @@ block2: ; addi s8, zero, 0x7d ; addi s7, zero, 0x82 ; addi s6, zero, 0x87 -; bnez a0, 0xb0 -; block2: ; offset 0xcc +; bnez a0, 8 +; j 0xc +; auipc t6, 0 +; jalr zero, t6, 0xb4 +; block2: ; offset 0xd8 ; addi a0, zero, 0x8c ; sd a2, 0x90(sp) ; sd a1, 0x98(sp) @@ -738,7 +741,7 @@ block2: ; ld s0, 0x80(sp) ; addi sp, sp, 0x90 ; jr t0 -; block3: ; offset 0x178 +; block3: ; offset 0x184 ; ld a0, 0x10(sp) ; sd a2, 0xa0(sp) ; sd a1, 0xa8(sp) diff --git a/cranelift/filetests/filetests/parser/cold.clif b/cranelift/filetests/filetests/parser/cold.clif new file mode 100644 index 000000000000..55064fe6ae3d --- /dev/null +++ b/cranelift/filetests/filetests/parser/cold.clif @@ -0,0 +1,12 @@ +test cat + +function %cold() cold { + sig0 = () cold + sig1 = () -> i8 cold +block1: + return +} + +; sameln: function %cold() cold { +; nextln: sig0 = () cold +; nextln: sig1 = () -> i8 cold diff --git a/cranelift/filetests/filetests/runtests/i128-bitcast.clif b/cranelift/filetests/filetests/runtests/i128-bitcast.clif new file mode 100644 index 000000000000..2b4cf930c6c6 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-bitcast.clif @@ -0,0 +1,15 @@ +test interpret +test run +set enable_llvm_abi_extensions=true +target aarch64 +target x86_64 +target s390x +target riscv64 +target riscv64 has_c has_zcb + +function %bitcast_i128_i128(i128) -> i128 { +block0(v0: i128): + v1 = bitcast.i128 v0 + return v1 +} +; run: %bitcast_i128_i128(0) == 0 diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index 9cf9116ff4e1..6068af8de3e2 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -1347,14 +1347,20 @@ impl<'a> Parser<'a> { } // The calling convention is optional. - if let Some(Token::Identifier(text)) = self.token() { - match text.parse() { + match self.token() { + Some(Token::Identifier(text)) => match text.parse() { Ok(cc) => { self.consume(); sig.call_conv = cc; } _ => return err!(self.loc, "unknown calling convention: {}", text), + }, + + Some(Token::Cold) => { + self.consume(); + sig.call_conv = CallConv::Cold; } + _ => {} } Ok(sig) diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index abdfa2cb6861..a910aacc3997 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -520,7 +520,8 @@ impl<'module_environment> FuncEnvironment<'module_environment> { fn epoch_function_entry(&mut self, builder: &mut FunctionBuilder<'_>) { builder.declare_var(self.epoch_deadline_var, ir::types::I64); - self.epoch_load_deadline_into_var(builder); + // Let epoch_check_full load the current deadline and call def_var + builder.declare_var(self.epoch_ptr_var, self.pointer_type()); let epoch_ptr = self.epoch_ptr(builder); builder.def_var(self.epoch_ptr_var, epoch_ptr); @@ -542,7 +543,9 @@ impl<'module_environment> FuncEnvironment<'module_environment> { // sufficient. Then, combined with checks at every backedge // (loop) the longest runtime between checks is bounded by the // straightline length of any function body. - self.epoch_check(builder); + let continuation_block = builder.create_block(); + let cur_epoch_value = self.epoch_load_current(builder); + self.epoch_check_full(builder, cur_epoch_value, continuation_block); } #[cfg(feature = "wmemcheck")] @@ -607,33 +610,30 @@ impl<'module_environment> FuncEnvironment<'module_environment> { ) } - fn epoch_load_deadline_into_var(&mut self, builder: &mut FunctionBuilder<'_>) { - let deadline = - builder.ins().load( - ir::types::I64, - ir::MemFlags::trusted(), - self.vmruntime_limits_ptr, - ir::immediates::Offset32::new( - self.offsets.ptr.vmruntime_limits_epoch_deadline() as i32 - ), - ); - builder.def_var(self.epoch_deadline_var, deadline); + fn epoch_check(&mut self, builder: &mut FunctionBuilder<'_>) { + let continuation_block = builder.create_block(); + + // Load new epoch and check against the cached deadline. + let cur_epoch_value = self.epoch_load_current(builder); + self.epoch_check_cached(builder, cur_epoch_value, continuation_block); + + // At this point we've noticed that the epoch has exceeded our + // cached deadline. However the real deadline may have been + // updated (within another yield) during some function that we + // called in the meantime, so reload the cache and check again. + self.epoch_check_full(builder, cur_epoch_value, continuation_block); } - fn epoch_check(&mut self, builder: &mut FunctionBuilder<'_>) { + fn epoch_check_cached( + &mut self, + builder: &mut FunctionBuilder, + cur_epoch_value: ir::Value, + continuation_block: ir::Block, + ) { let new_epoch_block = builder.create_block(); - let new_epoch_doublecheck_block = builder.create_block(); - let continuation_block = builder.create_block(); builder.set_cold_block(new_epoch_block); - builder.set_cold_block(new_epoch_doublecheck_block); let epoch_deadline = builder.use_var(self.epoch_deadline_var); - // Load new epoch and check against cached deadline. The - // deadline may be out of date if it was updated (within - // another yield) during some function that we called; this is - // fine, as we'll reload it and check again before yielding in - // the cold path. - let cur_epoch_value = self.epoch_load_current(builder); let cmp = builder.ins().icmp( IntCC::UnsignedGreaterThanOrEqual, cur_epoch_value, @@ -644,31 +644,30 @@ impl<'module_environment> FuncEnvironment<'module_environment> { .brif(cmp, new_epoch_block, &[], continuation_block, &[]); builder.seal_block(new_epoch_block); - // In the "new epoch block", we've noticed that the epoch has - // exceeded our cached deadline. However the real deadline may - // have been moved in the meantime. We keep the cached value - // in a register to speed the checks in the common case - // (between epoch ticks) but we want to do a precise check - // here, on the cold path, by reloading the latest value - // first. builder.switch_to_block(new_epoch_block); - self.epoch_load_deadline_into_var(builder); - let fresh_epoch_deadline = builder.use_var(self.epoch_deadline_var); - let fresh_cmp = builder.ins().icmp( - IntCC::UnsignedGreaterThanOrEqual, - cur_epoch_value, - fresh_epoch_deadline, - ); - builder.ins().brif( - fresh_cmp, - new_epoch_doublecheck_block, - &[], - continuation_block, - &[], - ); - builder.seal_block(new_epoch_doublecheck_block); + } + + fn epoch_check_full( + &mut self, + builder: &mut FunctionBuilder, + cur_epoch_value: ir::Value, + continuation_block: ir::Block, + ) { + // We keep the deadline cached in a register to speed the checks + // in the common case (between epoch ticks) but we want to do a + // precise check here by reloading the cache first. + let deadline = + builder.ins().load( + ir::types::I64, + ir::MemFlags::trusted(), + self.vmruntime_limits_ptr, + ir::immediates::Offset32::new( + self.offsets.ptr.vmruntime_limits_epoch_deadline() as i32 + ), + ); + builder.def_var(self.epoch_deadline_var, deadline); + self.epoch_check_cached(builder, cur_epoch_value, continuation_block); - builder.switch_to_block(new_epoch_doublecheck_block); let new_epoch = self.builtin_functions.new_epoch(builder.func); let vmctx = self.vmctx_val(&mut builder.cursor()); // new_epoch() returns the new deadline, so we don't have to diff --git a/crates/wasmtime/Cargo.toml b/crates/wasmtime/Cargo.toml index e380f077f043..1ecbcf181bf5 100644 --- a/crates/wasmtime/Cargo.toml +++ b/crates/wasmtime/Cargo.toml @@ -59,6 +59,7 @@ semver = { workspace = true, optional = true } smallvec = { workspace = true, optional = true } hashbrown = { workspace = true } libm = "0.2.7" +bitflags = { workspace = true } [target.'cfg(target_os = "windows")'.dependencies.windows-sys] workspace = true diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index 042497f414b2..f8e3faa21066 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -2190,34 +2190,22 @@ impl Default for Config { impl fmt::Debug for Config { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let mut f = f.debug_struct("Config"); - f.field("debug_info", &self.tunables.generate_native_debuginfo) - .field( - "wasm_threads", - &self.features.contains(WasmFeatures::THREADS), - ) - .field( - "wasm_reference_types", - &self.features.contains(WasmFeatures::REFERENCE_TYPES), - ) - .field( - "wasm_function_references", - &self.features.contains(WasmFeatures::FUNCTION_REFERENCES), - ) - .field("wasm_gc", &self.features.contains(WasmFeatures::GC)) - .field( - "wasm_bulk_memory", - &self.features.contains(WasmFeatures::BULK_MEMORY), - ) - .field("wasm_simd", &self.features.contains(WasmFeatures::SIMD)) - .field( - "wasm_relaxed_simd", - &self.features.contains(WasmFeatures::RELAXED_SIMD), - ) - .field( - "wasm_multi_value", - &self.features.contains(WasmFeatures::MULTI_VALUE), - ) - .field("parallel_compilation", &self.parallel_compilation); + f.field("debug_info", &self.tunables.generate_native_debuginfo); + + // Not every flag in WasmFeatures can be enabled as part of creating + // a Config. This impl gives a complete picture of all WasmFeatures + // enabled, and doesn't require maintence by hand (which has become out + // of date in the past), at the cost of possible confusion for why + // a flag in this set doesn't have a Config setter. + use bitflags::Flags; + for flag in WasmFeatures::FLAGS.iter() { + f.field( + &format!("wasm_{}", flag.name().to_lowercase()), + &self.features.contains(*flag.value()), + ); + } + + f.field("parallel_compilation", &self.parallel_compilation); #[cfg(any(feature = "cranelift", feature = "winch"))] { f.field("compiler_config", &self.compiler_config); @@ -2822,7 +2810,9 @@ impl PoolingAllocationConfig { /// The maximum byte size that any WebAssembly linear memory may grow to. /// - /// This option defaults to 10 MiB. + /// This option defaults to 4 GiB meaning that for 32-bit linear memories + /// there is no restrictions. 64-bit linear memories will not be allowed to + /// grow beyond 4 GiB by default. /// /// If a memory's minimum size is greater than this value, the module will /// fail to instantiate. @@ -2832,11 +2822,15 @@ impl PoolingAllocationConfig { /// instruction. /// /// This value is used to control the maximum accessible space for each - /// linear memory of a core instance. - /// - /// The reservation size of each linear memory is controlled by the - /// `static_memory_maximum_size` setting and this value cannot exceed the - /// configured static memory maximum size. + /// linear memory of a core instance. This can be thought of as a simple + /// mechanism like [`Store::limiter`](crate::Store::limiter) to limit memory + /// at runtime. This value can also affect striping/coloring behavior when + /// used in conjunction with + /// [`memory_protection_keys`](PoolingAllocationConfig::memory_protection_keys). + /// + /// The virtual memory reservation size of each linear memory is controlled + /// by the [`Config::static_memory_maximum_size`] setting and this method's + /// configuration cannot exceed [`Config::static_memory_maximum_size`]. pub fn max_memory_size(&mut self, bytes: usize) -> &mut Self { self.config.limits.max_memory_size = bytes; self @@ -2853,6 +2847,11 @@ impl PoolingAllocationConfig { /// regions are accessible each time executions switches from host to guest /// (or vice versa). /// + /// Leveraging MPK requires configuring a smaller-than-default + /// [`max_memory_size`](PoolingAllocationConfig::max_memory_size) to enable + /// this coloring/striping behavior. For example embeddings might want to + /// reduce the default 4G allowance to 128M. + /// /// MPK is only available on Linux (called `pku` there) and recent x86 /// systems; we check for MPK support at runtime by examining the `CPUID` /// register. This configuration setting can be in three states: diff --git a/crates/wasmtime/src/runtime/vm/instance/allocator/pooling.rs b/crates/wasmtime/src/runtime/vm/instance/allocator/pooling.rs index 5d5bbb93c9e4..ba42d061683e 100644 --- a/crates/wasmtime/src/runtime/vm/instance/allocator/pooling.rs +++ b/crates/wasmtime/src/runtime/vm/instance/allocator/pooling.rs @@ -166,7 +166,7 @@ impl Default for InstanceLimits { // have 10k+ elements. table_elements: 20_000, max_memories_per_module: 1, - max_memory_size: 10 * (1 << 20), // 10 MiB + max_memory_size: 1 << 32, // 4G, #[cfg(feature = "gc")] total_gc_heaps: 1000, } diff --git a/tests/disas/epoch-interruption-x86.wat b/tests/disas/epoch-interruption-x86.wat new file mode 100644 index 000000000000..b4f4719083c9 --- /dev/null +++ b/tests/disas/epoch-interruption-x86.wat @@ -0,0 +1,39 @@ +;;! target = "x86_64" +;;! test = "compile" +;;! flags = ["-Wepoch-interruption=y"] + +(module (func (loop (br 0)))) + +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r10 +;; movq (%r10), %r10 +;; addq $0x30, %r10 +;; cmpq %rsp, %r10 +;; ja 0x7f +;; 18: subq $0x20, %rsp +;; movq %rbx, (%rsp) +;; movq %r12, 8(%rsp) +;; movq %r13, 0x10(%rsp) +;; movq 8(%rdi), %r12 +;; movq 0x20(%rdi), %rbx +;; movq %rdi, %r13 +;; movq (%rbx), %r9 +;; movq 0x10(%r12), %rax +;; cmpq %rax, %r9 +;; jae 0x57 +;; 46: movq (%rbx), %rdi +;; cmpq %rax, %rdi +;; jae 0x64 +;; jmp 0x46 +;; 57: movq %r13, %rdi +;; callq 0xdf +;; jmp 0x46 +;; 64: movq 0x10(%r12), %rax +;; cmpq %rax, %rdi +;; jb 0x46 +;; 72: movq %r13, %rdi +;; callq 0xdf +;; jmp 0x46 +;; 7f: ud2 diff --git a/tests/disas/epoch-interruption.wat b/tests/disas/epoch-interruption.wat new file mode 100644 index 000000000000..c11016ee7510 --- /dev/null +++ b/tests/disas/epoch-interruption.wat @@ -0,0 +1,47 @@ +;;! target = "x86_64" +;;! test = "optimize" +;;! flags = ["-Wepoch-interruption=y"] + +(module (func (loop (br 0)))) + +;; function u0:0(i64 vmctx, i64) tail { +;; gv0 = vmctx +;; gv1 = load.i64 notrap aligned readonly gv0+8 +;; gv2 = load.i64 notrap aligned gv1 +;; gv3 = vmctx +;; sig0 = (i64 vmctx) -> i64 system_v +;; fn0 = colocated u1:16 sig0 +;; stack_limit = gv2 +;; +;; block0(v0: i64, v1: i64): +;; @0016 v3 = load.i64 notrap aligned v0+8 +;; @0016 v5 = load.i64 notrap aligned v0+32 +;; @0016 v6 = load.i64 notrap aligned v5 +;; @0016 v7 = load.i64 notrap aligned v3+16 +;; @0016 v8 = icmp uge v6, v7 +;; @0016 brif v8, block3, block2(v7) +;; +;; block3 cold: +;; @0016 v10 = call fn0(v0) +;; @0016 jump block2(v10) +;; +;; block2(v21: i64): +;; @0017 jump block4(v21) +;; +;; block4(v13: i64): +;; @0017 v12 = load.i64 notrap aligned v5 +;; @0017 v14 = icmp uge v12, v13 +;; @0017 brif v14, block7, block6(v13) +;; +;; block7 cold: +;; @0017 v15 = load.i64 notrap aligned v3+16 +;; @0017 v16 = icmp.i64 uge v12, v15 +;; @0017 brif v16, block8, block6(v15) +;; +;; block8 cold: +;; @0017 v18 = call fn0(v0) +;; @0017 jump block6(v18) +;; +;; block6(v22: i64): +;; @0019 jump block4(v22) +;; } diff --git a/tests/disas/x64-simd-test-and-branch.wat b/tests/disas/x64-simd-test-and-branch.wat new file mode 100644 index 000000000000..1bbceb0f8d20 --- /dev/null +++ b/tests/disas/x64-simd-test-and-branch.wat @@ -0,0 +1,125 @@ +;;! target = "x86_64" +;;! test = "compile" +;;! flags = ["-Ccranelift-sse41"] + +(module + (func $i8x16.all_true (param v128) (result i32) + local.get 0 + i8x16.all_true + if (result i32) + i32.const 100 + else + i32.const 200 + end + ) + + (func $i16x8.all_true (param v128) (result i32) + local.get 0 + i16x8.all_true + if (result i32) + i32.const 100 + else + i32.const 200 + end + ) + + (func $i32x4.all_true (param v128) (result i32) + local.get 0 + i32x4.all_true + if (result i32) + i32.const 100 + else + i32.const 200 + end + ) + + (func $i64x2.all_true (param v128) (result i32) + local.get 0 + i64x2.all_true + if (result i32) + i32.const 100 + else + i32.const 200 + end + ) + + (func $v128.any_true (param v128) (result i32) + local.get 0 + v128.any_true + if (result i32) + i32.const 100 + else + i32.const 200 + end + ) +) +;; wasm[0]::function[0]::i8x16.all_true: +;; pushq %rbp +;; movq %rsp, %rbp +;; pxor %xmm7, %xmm7 +;; pcmpeqb %xmm7, %xmm0 +;; ptest %xmm0, %xmm0 +;; je 0x21 +;; 17: movl $0xc8, %eax +;; jmp 0x26 +;; 21: movl $0x64, %eax +;; movq %rbp, %rsp +;; popq %rbp +;; retq +;; +;; wasm[0]::function[1]::i16x8.all_true: +;; pushq %rbp +;; movq %rsp, %rbp +;; pxor %xmm7, %xmm7 +;; pcmpeqw %xmm7, %xmm0 +;; ptest %xmm0, %xmm0 +;; je 0x61 +;; 57: movl $0xc8, %eax +;; jmp 0x66 +;; 61: movl $0x64, %eax +;; movq %rbp, %rsp +;; popq %rbp +;; retq +;; +;; wasm[0]::function[2]::i32x4.all_true: +;; pushq %rbp +;; movq %rsp, %rbp +;; pxor %xmm7, %xmm7 +;; pcmpeqd %xmm7, %xmm0 +;; ptest %xmm0, %xmm0 +;; je 0xa1 +;; 97: movl $0xc8, %eax +;; jmp 0xa6 +;; a1: movl $0x64, %eax +;; movq %rbp, %rsp +;; popq %rbp +;; retq +;; +;; wasm[0]::function[3]::i64x2.all_true: +;; pushq %rbp +;; movq %rsp, %rbp +;; pxor %xmm7, %xmm7 +;; pcmpeqq %xmm7, %xmm0 +;; ptest %xmm0, %xmm0 +;; je 0xe2 +;; d8: movl $0xc8, %eax +;; jmp 0xe7 +;; e2: movl $0x64, %eax +;; movq %rbp, %rsp +;; popq %rbp +;; retq +;; +;; wasm[0]::function[4]::v128.any_true: +;; pushq %rbp +;; movq %rsp, %rbp +;; pxor %xmm7, %xmm7 +;; pcmpeqb %xmm7, %xmm0 +;; pmovmskb %xmm0, %ecx +;; cmpl $0xffff, %ecx +;; jne 0x126 +;; 11c: movl $0xc8, %eax +;; jmp 0x12b +;; 126: movl $0x64, %eax +;; movq %rbp, %rsp +;; popq %rbp +;; retq diff --git a/tests/disas/x64-store-imm.wat b/tests/disas/x64-store-imm.wat new file mode 100644 index 000000000000..73ca4b9320bf --- /dev/null +++ b/tests/disas/x64-store-imm.wat @@ -0,0 +1,27 @@ +;;! target = "x86_64" +;;! test = "compile" + +(module + (global $g (mut i32) (i32.const 0)) + + (func $foo + (global.set $g (i32.const 0)) + (global.set $g (i32.const 1)) + (global.set $g (i32.const -1)) + (global.set $g (i32.const -10)) + (global.set $g (i32.const 100000)) + (global.set $g (i32.const 0x8fff_ffff)) + ) +) +;; wasm[0]::function[0]::foo: +;; pushq %rbp +;; movq %rsp, %rbp +;; movl $0, 0x60(%rdi) +;; movl $1, 0x60(%rdi) +;; movl $0xffffffff, 0x60(%rdi) +;; movl $0xfffffff6, 0x60(%rdi) +;; movl $0x186a0, 0x60(%rdi) +;; movl $0x8fffffff, 0x60(%rdi) +;; movq %rbp, %rsp +;; popq %rbp +;; retq diff --git a/tests/disas/x64-vector-patterns.wat b/tests/disas/x64-vector-patterns.wat new file mode 100644 index 000000000000..e043427c3bb6 --- /dev/null +++ b/tests/disas/x64-vector-patterns.wat @@ -0,0 +1,22 @@ +;;! target = "x86_64" +;;! test = "compile" + +(module + (func $zero (result v128) v128.const i64x2 0 0) + (func $ones (result v128) v128.const i64x2 -1 -1) +) +;; wasm[0]::function[0]::zero: +;; pushq %rbp +;; movq %rsp, %rbp +;; pxor %xmm0, %xmm0 +;; movq %rbp, %rsp +;; popq %rbp +;; retq +;; +;; wasm[0]::function[1]::ones: +;; pushq %rbp +;; movq %rsp, %rbp +;; pcmpeqd %xmm0, %xmm0 +;; movq %rbp, %rsp +;; popq %rbp +;; retq