Skip to content

Commit 93246e1

Browse files
author
wxue1
committed
cacheline demote to improve cache performance
After the JITTed code is generated, cacheline demote would give a hint to hardware to push out the cache line that contains the linear address. This gets nearly 1% performance gain on our workload. Signed-off-by: Xue,Wang <xue1.wang@intel.com> Signed-off-by: Tao,Su <tao.su@intel.com> Signed-off-by: Hu,chen <hu1.chen@intel.com>
1 parent 964f494 commit 93246e1

File tree

2 files changed

+46
-0
lines changed

2 files changed

+46
-0
lines changed

Zend/zend_cpuinfo.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,4 +220,15 @@ static inline int zend_cpu_supports_pclmul(void) {
220220
}
221221
#endif
222222

223+
/* __builtin_cpu_supports has cldemote from gcc11 */
224+
#if PHP_HAVE_BUILTIN_CPU_SUPPORTS && defined(__GNUC__) && (ZEND_GCC_VERSION >= 11000)
225+
ZEND_NO_SANITIZE_ADDRESS
226+
static inline int zend_cpu_supports_cldemote(void) {
227+
#if PHP_HAVE_BUILTIN_CPU_INIT
228+
__builtin_cpu_init();
229+
#endif
230+
return __builtin_cpu_supports("cldemote");
231+
}
232+
#endif
233+
223234
#endif

ext/opcache/jit/zend_jit.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,30 @@ static zend_jit_trace_info *zend_jit_get_current_trace_info(void);
137137
static uint32_t zend_jit_trace_find_exit_point(const void* addr);
138138
#endif
139139

140+
#if ZEND_JIT_TARGET_X86 && defined(__linux__)
141+
# if defined(__GNUC__) && (ZEND_GCC_VERSION >= 11000)
142+
# define ZEND_JIT_SUPPORT_CLDEMOTE 1
143+
# else
144+
# define ZEND_JIT_SUPPORT_CLDEMOTE 0
145+
# endif
146+
#endif
147+
148+
#if ZEND_JIT_SUPPORT_CLDEMOTE
149+
#include <immintrin.h>
150+
#pragma GCC push_options
151+
#pragma GCC target("cldemote")
152+
static int cpu_support_cldemote = 0;
153+
static inline void shared_cacheline_demote(uintptr_t start, size_t size) {
154+
uintptr_t cache_line_base = start & ~0x3F;
155+
do {
156+
_cldemote((void *)cache_line_base);
157+
// next cacheline start size
158+
cache_line_base += 64;
159+
} while (cache_line_base < start + size);
160+
}
161+
#pragma GCC pop_options
162+
#endif
163+
140164
static int zend_jit_assign_to_variable(dasm_State **Dst,
141165
const zend_op *opline,
142166
zend_jit_addr var_use_addr,
@@ -972,6 +996,13 @@ static void *dasm_link_and_encode(dasm_State **dasm_state,
972996
/* flush the hardware I-cache */
973997
JIT_CACHE_FLUSH(entry, entry + size);
974998

999+
/* hint to the hardware to push out the cache line that contains the linear address */
1000+
#if ZEND_JIT_SUPPORT_CLDEMOTE
1001+
if (cpu_support_cldemote && JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) {
1002+
shared_cacheline_demote((uintptr_t)entry, size);
1003+
}
1004+
#endif
1005+
9751006
if (trace_num) {
9761007
zend_jit_trace_add_code(entry, dasm_getpclabel(dasm_state, 1));
9771008
}
@@ -4931,6 +4962,10 @@ ZEND_EXT_API int zend_jit_startup(void *buf, size_t size, bool reattached)
49314962
}
49324963
#endif
49334964

4965+
#if ZEND_JIT_SUPPORT_CLDEMOTE
4966+
cpu_support_cldemote = zend_cpu_supports_cldemote();
4967+
#endif
4968+
49344969
dasm_buf = buf;
49354970
dasm_size = size;
49364971

0 commit comments

Comments
 (0)