From 8c089bd5848112b91c1f2f836031b4593b44716f Mon Sep 17 00:00:00 2001 From: Zoltan Herczeg Date: Fri, 9 Aug 2024 08:41:48 +0000 Subject: [PATCH] Implement m,emory fence --- sljit_src/sljitLir.c | 6 +++--- sljit_src/sljitLir.h | 13 ++++++++++--- sljit_src/sljitNativeARM_32.c | 12 +++++++++++- sljit_src/sljitNativeARM_64.c | 4 ++++ sljit_src/sljitNativeARM_T2_32.c | 4 ++++ sljit_src/sljitNativeMIPS_common.c | 8 ++++++++ sljit_src/sljitNativePPC_common.c | 4 ++++ sljit_src/sljitNativeRISCV_common.c | 4 ++++ sljit_src/sljitNativeS390X.c | 3 +++ sljit_src/sljitNativeX86_common.c | 9 +++++++++ test_src/sljitTest.c | 3 +++ 11 files changed, 63 insertions(+), 7 deletions(-) diff --git a/sljit_src/sljitLir.c b/sljit_src/sljitLir.c index 8e0a2040..ac726ccb 100644 --- a/sljit_src/sljitLir.c +++ b/sljit_src/sljitLir.c @@ -1148,7 +1148,7 @@ static void sljit_verbose_vparam(struct sljit_compiler *compiler, sljit_s32 p, s static const char* op0_names[] = { "breakpoint", "nop", "lmul.uw", "lmul.sw", "divmod.u", "divmod.s", "div.u", "div.s", - "endbr", "skip_frames_before_return" + "memory_barrier", "endbr", "skip_frames_before_return" }; static const char* op1_names[] = { @@ -1507,8 +1507,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op0(struct sljit_compiler #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT((op >= SLJIT_BREAKPOINT && op <= SLJIT_LMUL_SW) || ((op & ~SLJIT_32) >= SLJIT_DIVMOD_UW && (op & ~SLJIT_32) <= SLJIT_DIV_SW) - || (op >= SLJIT_ENDBR && op <= SLJIT_SKIP_FRAMES_BEFORE_RETURN)); - CHECK_ARGUMENT(SLJIT_CHECK_OPCODE(op, 0) < SLJIT_LMUL_UW || SLJIT_CHECK_OPCODE(op, 0) >= SLJIT_ENDBR || compiler->scratches >= 2); + || (op >= SLJIT_MEMORY_BARRIER && op <= SLJIT_SKIP_FRAMES_BEFORE_RETURN)); + CHECK_ARGUMENT(SLJIT_CHECK_OPCODE(op, 0) < SLJIT_LMUL_UW || SLJIT_CHECK_OPCODE(op, 0) >= SLJIT_MEMORY_BARRIER || compiler->scratches >= 2); if ((SLJIT_CHECK_OPCODE(op, 0) >= SLJIT_LMUL_UW && SLJIT_CHECK_OPCODE(op, 0) <= SLJIT_DIV_SW) || op == SLJIT_SKIP_FRAMES_BEFORE_RETURN) compiler->last_flags = 0; #endif diff --git a/sljit_src/sljitLir.h b/sljit_src/sljitLir.h index d68cf27f..6f390c28 100644 --- a/sljit_src/sljitLir.h +++ b/sljit_src/sljitLir.h @@ -762,7 +762,9 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler and SLJIT_FR1 are overwritten. */ #define SLJIT_SIMD_REGS_ARE_PAIRS 13 /* [Not emulated] Atomic support is available. */ -#define SLJIT_HAS_ATOMIC 14 +#define SLJIT_HAS_ATOMIC 14 +/* [Not emulated] Memory barrier support is available. */ +#define SLJIT_HAS_MEMORY_BARRIER 15 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) /* [Not emulated] AVX support is available on x86. */ @@ -1169,16 +1171,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *c the behaviour is undefined. */ #define SLJIT_DIV_SW (SLJIT_OP0_BASE + 7) #define SLJIT_DIV_S32 (SLJIT_DIV_SW | SLJIT_32) +/* Flags: - (does not modify flags) + May return with SLJIT_ERR_UNSUPPORTED if SLJIT_HAS_MEMORY_BARRIER + feature is not supported (calling sljit_has_cpu_feature() with + this feature option returns with 0). */ +#define SLJIT_MEMORY_BARRIER (SLJIT_OP0_BASE + 8) /* Flags: - (does not modify flags) ENDBR32 instruction for x86-32 and ENDBR64 instruction for x86-64 when Intel Control-flow Enforcement Technology (CET) is enabled. No instructions are emitted for other architectures. */ -#define SLJIT_ENDBR (SLJIT_OP0_BASE + 8) +#define SLJIT_ENDBR (SLJIT_OP0_BASE + 9) /* Flags: - (may destroy flags) Skip stack frames before return when Intel Control-flow Enforcement Technology (CET) is enabled. No instructions are emitted for other architectures. */ -#define SLJIT_SKIP_FRAMES_BEFORE_RETURN (SLJIT_OP0_BASE + 9) +#define SLJIT_SKIP_FRAMES_BEFORE_RETURN (SLJIT_OP0_BASE + 10) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op); diff --git a/sljit_src/sljitNativeARM_32.c b/sljit_src/sljitNativeARM_32.c index 88bf988c..b22894b7 100644 --- a/sljit_src/sljitNativeARM_32.c +++ b/sljit_src/sljitNativeARM_32.c @@ -114,6 +114,7 @@ static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) #define CLZ 0xe16f0f10 #define CMN 0xe1600000 #define CMP 0xe1400000 +#define DMB_SY 0xf57ff05f #define EOR 0xe0200000 #define LDR 0xe5100000 #define LDR_POST 0xe4100000 @@ -365,7 +366,7 @@ static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ while (last_pc_patch < code_ptr) { /* Data transfer instruction with Rn == r15. */ - if ((*last_pc_patch & 0x0e0f0000) == 0x040f0000) { + if ((*last_pc_patch & 0x0e4f0000) == 0x040f0000) { diff = (sljit_uw)(const_pool - last_pc_patch); ind = (*last_pc_patch) & 0xfff; @@ -1132,6 +1133,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_COPY_F32: case SLJIT_HAS_COPY_F64: case SLJIT_HAS_ATOMIC: +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + case SLJIT_HAS_MEMORY_BARRIER: +#endif /* SLJIT_CONFIG_ARM_V7 */ return 1; case SLJIT_HAS_CTZ: @@ -2373,6 +2377,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */); } return SLJIT_SUCCESS; + case SLJIT_MEMORY_BARRIER: +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + return push_inst(compiler, DMB_SY); +#else /* !SLJIT_CONFIG_ARM_V7 */ + return SLJIT_ERR_UNSUPPORTED; +#endif /* SLJIT_CONFIG_ARM_V7 */ case SLJIT_ENDBR: case SLJIT_SKIP_FRAMES_BEFORE_RETURN: return SLJIT_SUCCESS; diff --git a/sljit_src/sljitNativeARM_64.c b/sljit_src/sljitNativeARM_64.c index a72ba78c..c3f82bc6 100644 --- a/sljit_src/sljitNativeARM_64.c +++ b/sljit_src/sljitNativeARM_64.c @@ -91,6 +91,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define CLZ 0xdac01000 #define CSEL 0x9a800000 #define CSINC 0x9a800400 +#define DMB_SY 0xd5033fbf #define DUP_e 0x0e000400 #define DUP_g 0x0e000c00 #define EOR 0xca000000 @@ -594,6 +595,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_COPY_F32: case SLJIT_HAS_COPY_F64: case SLJIT_HAS_ATOMIC: + case SLJIT_HAS_MEMORY_BARRIER: return 1; default: @@ -1563,6 +1565,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile case SLJIT_DIV_UW: case SLJIT_DIV_SW: return push_inst(compiler, ((op == SLJIT_DIV_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)); + case SLJIT_MEMORY_BARRIER: + return push_inst(compiler, DMB_SY); case SLJIT_ENDBR: case SLJIT_SKIP_FRAMES_BEFORE_RETURN: return SLJIT_SUCCESS; diff --git a/sljit_src/sljitNativeARM_T2_32.c b/sljit_src/sljitNativeARM_T2_32.c index fa407b72..233e1327 100644 --- a/sljit_src/sljitNativeARM_T2_32.c +++ b/sljit_src/sljitNativeARM_T2_32.c @@ -138,6 +138,7 @@ static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) #define CMPI_W 0xf1b00f00 #define CMP_X 0x4500 #define CMP_W 0xebb00f00 +#define DMB_SY 0xf3bf8f5f #define EORI 0xf0800000 #define EORS 0x4040 #define EOR_W 0xea800000 @@ -695,6 +696,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_COPY_F32: case SLJIT_HAS_COPY_F64: case SLJIT_HAS_ATOMIC: + case SLJIT_HAS_MEMORY_BARRIER: return 1; default: @@ -1914,6 +1916,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile } return SLJIT_SUCCESS; #endif /* __ARM_FEATURE_IDIV || __ARM_ARCH_EXT_IDIV__ */ + case SLJIT_MEMORY_BARRIER: + return push_inst32(compiler, DMB_SY); case SLJIT_ENDBR: case SLJIT_SKIP_FRAMES_BEFORE_RETURN: return SLJIT_SUCCESS; diff --git a/sljit_src/sljitNativeMIPS_common.c b/sljit_src/sljitNativeMIPS_common.c index 80700661..3f0a2658 100644 --- a/sljit_src/sljitNativeMIPS_common.c +++ b/sljit_src/sljitNativeMIPS_common.c @@ -312,6 +312,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { #define SWL (HI(42)) #define SWR (HI(46)) #define SWC1 (HI(57)) +#define SYNC (HI(0) | LO(15)) #define TRUNC_W_S (HI(17) | FMT_S | LO(13)) #if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 #define WSBH (HI(31) | (2 << 6) | LO(32)) @@ -862,6 +863,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_CMOV: case SLJIT_HAS_PREFETCH: case SLJIT_HAS_ATOMIC: + case SLJIT_HAS_MEMORY_BARRIER: return 1; case SLJIT_HAS_CTZ: @@ -2478,6 +2480,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0))); return (op >= SLJIT_DIV_UW) ? SLJIT_SUCCESS : push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1)); #endif /* SLJIT_MIPS_REV >= 6 */ + case SLJIT_MEMORY_BARRIER: +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + return push_inst(compiler, SYNC, UNMOVABLE_INS); +#else /* SLJIT_MIPS_REV < 1 */ + return SLJIT_ERR_UNSUPPORTED; +#endif /* SLJIT_MIPS_REV >= 1 */ case SLJIT_ENDBR: case SLJIT_SKIP_FRAMES_BEFORE_RETURN: return SLJIT_SUCCESS; diff --git a/sljit_src/sljitNativePPC_common.c b/sljit_src/sljitNativePPC_common.c index 18a364ec..8c3e4224 100644 --- a/sljit_src/sljitNativePPC_common.c +++ b/sljit_src/sljitNativePPC_common.c @@ -249,6 +249,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define SUBFC (HI(31) | LO(8)) #define SUBFE (HI(31) | LO(136)) #define SUBFIC (HI(8)) +#define SYNC (HI(31) | LO(598)) #define XOR (HI(31) | LO(316)) #define XORI (HI(26)) #define XORIS (HI(27)) @@ -753,6 +754,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_ROT: case SLJIT_HAS_PREFETCH: case SLJIT_HAS_ATOMIC: + case SLJIT_HAS_MEMORY_BARRIER: return 1; case SLJIT_HAS_CTZ: @@ -1413,6 +1415,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile #else return push_inst(compiler, (op == SLJIT_DIV_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)); #endif + case SLJIT_MEMORY_BARRIER: + return push_inst(compiler, SYNC); case SLJIT_ENDBR: case SLJIT_SKIP_FRAMES_BEFORE_RETURN: return SLJIT_SUCCESS; diff --git a/sljit_src/sljitNativeRISCV_common.c b/sljit_src/sljitNativeRISCV_common.c index 5d6d5a60..e487b79b 100644 --- a/sljit_src/sljitNativeRISCV_common.c +++ b/sljit_src/sljitNativeRISCV_common.c @@ -116,6 +116,7 @@ static const sljit_u8 vreg_map[SLJIT_NUMBER_OF_VECTOR_REGISTERS + 3] = { #define EBREAK (F12(0x1) | F3(0x0) | OPC(0x73)) #define FADD_S (F7(0x0) | F3(0x7) | OPC(0x53)) #define FDIV_S (F7(0xc) | F3(0x7) | OPC(0x53)) +#define FENCE (F3(0x0) | OPC(0xf)) #define FEQ_S (F7(0x50) | F3(0x2) | OPC(0x53)) #define FLD (F3(0x3) | OPC(0x7)) #define FLE_S (F7(0x50) | F3(0x0) | OPC(0x53)) @@ -727,6 +728,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_COPY_F64: #endif /* !SLJIT_CONFIG_RISCV_64 */ case SLJIT_HAS_ATOMIC: + case SLJIT_HAS_MEMORY_BARRIER: #ifdef __riscv_vector case SLJIT_HAS_SIMD: #endif @@ -2056,6 +2058,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile return push_inst(compiler, DIVU | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1)); case SLJIT_DIV_SW: return push_inst(compiler, DIV | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1)); + case SLJIT_MEMORY_BARRIER: + return push_inst(compiler, FENCE | 0x0ff00000); case SLJIT_ENDBR: case SLJIT_SKIP_FRAMES_BEFORE_RETURN: return SLJIT_SUCCESS; diff --git a/sljit_src/sljitNativeS390X.c b/sljit_src/sljitNativeS390X.c index 5f2a5f2c..3095d409 100644 --- a/sljit_src/sljitNativeS390X.c +++ b/sljit_src/sljitNativeS390X.c @@ -1638,6 +1638,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_COPY_F64: case SLJIT_HAS_SIMD: case SLJIT_HAS_ATOMIC: + case SLJIT_HAS_MEMORY_BARRIER: return 1; case SLJIT_HAS_CTZ: @@ -1955,6 +1956,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */ return SLJIT_SUCCESS; + case SLJIT_MEMORY_BARRIER: + return push_inst(compiler, 0x0700 /* bcr */ | (0xe << 4) | 0); case SLJIT_ENDBR: return SLJIT_SUCCESS; case SLJIT_SKIP_FRAMES_BEFORE_RETURN: diff --git a/sljit_src/sljitNativeX86_common.c b/sljit_src/sljitNativeX86_common.c index c64bc01d..496f80c5 100644 --- a/sljit_src/sljitNativeX86_common.c +++ b/sljit_src/sljitNativeX86_common.c @@ -1038,6 +1038,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_COPY_F32: case SLJIT_HAS_COPY_F64: case SLJIT_HAS_ATOMIC: + case SLJIT_HAS_MEMORY_BARRIER: return 1; #if !(defined SLJIT_IS_FPU_AVAILABLE) || SLJIT_IS_FPU_AVAILABLE @@ -1497,6 +1498,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); #endif break; + case SLJIT_MEMORY_BARRIER: + inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); + FAIL_IF(!inst); + INC_SIZE(3); + inst[0] = GROUP_0F; + inst[1] = 0xae; + inst[2] = 0xf0; + return SLJIT_SUCCESS; case SLJIT_ENDBR: return emit_endbranch(compiler); case SLJIT_SKIP_FRAMES_BEFORE_RETURN: diff --git a/test_src/sljitTest.c b/test_src/sljitTest.c index e3e3d2c4..fda719bd 100644 --- a/test_src/sljitTest.c +++ b/test_src/sljitTest.c @@ -2559,6 +2559,9 @@ static void test25(void) /* buf[3] */ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 3 * sizeof(sljit_sw), SLJIT_S4, 0); + if (sljit_has_cpu_feature(SLJIT_HAS_MEMORY_BARRIER) != 0) + SLJIT_ASSERT(sljit_emit_op0(compiler, SLJIT_MEMORY_BARRIER) != SLJIT_ERR_UNSUPPORTED); + const1 = sljit_emit_const(compiler, SLJIT_S3, 0, 0); sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_S3, 0); sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_S3, 0, SLJIT_S3, 0, SLJIT_IMM, 100);