Skip to content

Commit 1ba3cbf

Browse files
Peng Zhangakpm00
authored andcommitted
mm: kfence: improve the performance of __kfence_alloc() and __kfence_free()
In __kfence_alloc() and __kfence_free(), we will set and check canary. Assuming that the size of the object is close to 0, nearly 4k memory accesses are required because setting and checking canary is executed byte by byte. canary is now defined like this: KFENCE_CANARY_PATTERN(addr) ((u8)0xaa ^ (u8)((unsigned long)(addr) & 0x7)) Observe that canary is only related to the lower three bits of the address, so every 8 bytes of canary are the same. We can access 8-byte canary each time instead of byte-by-byte, thereby optimizing nearly 4k memory accesses to 4k/8 times. Use the bcc tool funclatency to measure the latency of __kfence_alloc() and __kfence_free(), the numbers (deleted the distribution of latency) is posted below. Though different object sizes will have an impact on the measurement, we ignore it for now and assume the average object size is roughly equal. Before patching: __kfence_alloc: avg = 5055 nsecs, total: 5515252 nsecs, count: 1091 __kfence_free: avg = 5319 nsecs, total: 9735130 nsecs, count: 1830 After patching: __kfence_alloc: avg = 3597 nsecs, total: 6428491 nsecs, count: 1787 __kfence_free: avg = 3046 nsecs, total: 3415390 nsecs, count: 1121 The numbers indicate that there is ~30% - ~40% performance improvement. Link: https://lkml.kernel.org/r/20230403122738.6006-1-zhangpeng.00@bytedance.com Signed-off-by: Peng Zhang <zhangpeng.00@bytedance.com> Reviewed-by: Marco Elver <elver@google.com> Cc: Alexander Potapenko <glider@google.com> Cc: Dmitry Vyukov <dvyukov@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 141fdee commit 1ba3cbf

File tree

3 files changed

+59
-23
lines changed

3 files changed

+59
-23
lines changed

mm/kfence/core.c

Lines changed: 49 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -297,20 +297,13 @@ metadata_update_state(struct kfence_metadata *meta, enum kfence_object_state nex
297297
WRITE_ONCE(meta->state, next);
298298
}
299299

300-
/* Write canary byte to @addr. */
301-
static inline bool set_canary_byte(u8 *addr)
302-
{
303-
*addr = KFENCE_CANARY_PATTERN(addr);
304-
return true;
305-
}
306-
307300
/* Check canary byte at @addr. */
308301
static inline bool check_canary_byte(u8 *addr)
309302
{
310303
struct kfence_metadata *meta;
311304
unsigned long flags;
312305

313-
if (likely(*addr == KFENCE_CANARY_PATTERN(addr)))
306+
if (likely(*addr == KFENCE_CANARY_PATTERN_U8(addr)))
314307
return true;
315308

316309
atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
@@ -323,32 +316,67 @@ static inline bool check_canary_byte(u8 *addr)
323316
return false;
324317
}
325318

326-
/* __always_inline this to ensure we won't do an indirect call to fn. */
327-
static __always_inline void for_each_canary(const struct kfence_metadata *meta, bool (*fn)(u8 *))
319+
static inline void set_canary(const struct kfence_metadata *meta)
328320
{
329321
const unsigned long pageaddr = ALIGN_DOWN(meta->addr, PAGE_SIZE);
330-
unsigned long addr;
322+
unsigned long addr = pageaddr;
323+
324+
/*
325+
* The canary may be written to part of the object memory, but it does
326+
* not affect it. The user should initialize the object before using it.
327+
*/
328+
for (; addr < meta->addr; addr += sizeof(u64))
329+
*((u64 *)addr) = KFENCE_CANARY_PATTERN_U64;
330+
331+
addr = ALIGN_DOWN(meta->addr + meta->size, sizeof(u64));
332+
for (; addr - pageaddr < PAGE_SIZE; addr += sizeof(u64))
333+
*((u64 *)addr) = KFENCE_CANARY_PATTERN_U64;
334+
}
335+
336+
static inline void check_canary(const struct kfence_metadata *meta)
337+
{
338+
const unsigned long pageaddr = ALIGN_DOWN(meta->addr, PAGE_SIZE);
339+
unsigned long addr = pageaddr;
331340

332341
/*
333-
* We'll iterate over each canary byte per-side until fn() returns
334-
* false. However, we'll still iterate over the canary bytes to the
342+
* We'll iterate over each canary byte per-side until a corrupted byte
343+
* is found. However, we'll still iterate over the canary bytes to the
335344
* right of the object even if there was an error in the canary bytes to
336345
* the left of the object. Specifically, if check_canary_byte()
337346
* generates an error, showing both sides might give more clues as to
338347
* what the error is about when displaying which bytes were corrupted.
339348
*/
340349

341350
/* Apply to left of object. */
342-
for (addr = pageaddr; addr < meta->addr; addr++) {
343-
if (!fn((u8 *)addr))
351+
for (; meta->addr - addr >= sizeof(u64); addr += sizeof(u64)) {
352+
if (unlikely(*((u64 *)addr) != KFENCE_CANARY_PATTERN_U64))
344353
break;
345354
}
346355

347-
/* Apply to right of object. */
348-
for (addr = meta->addr + meta->size; addr < pageaddr + PAGE_SIZE; addr++) {
349-
if (!fn((u8 *)addr))
356+
/*
357+
* If the canary is corrupted in a certain 64 bytes, or the canary
358+
* memory cannot be completely covered by multiple consecutive 64 bytes,
359+
* it needs to be checked one by one.
360+
*/
361+
for (; addr < meta->addr; addr++) {
362+
if (unlikely(!check_canary_byte((u8 *)addr)))
350363
break;
351364
}
365+
366+
/* Apply to right of object. */
367+
for (addr = meta->addr + meta->size; addr % sizeof(u64) != 0; addr++) {
368+
if (unlikely(!check_canary_byte((u8 *)addr)))
369+
return;
370+
}
371+
for (; addr - pageaddr < PAGE_SIZE; addr += sizeof(u64)) {
372+
if (unlikely(*((u64 *)addr) != KFENCE_CANARY_PATTERN_U64)) {
373+
374+
for (; addr - pageaddr < PAGE_SIZE; addr++) {
375+
if (!check_canary_byte((u8 *)addr))
376+
return;
377+
}
378+
}
379+
}
352380
}
353381

354382
static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t gfp,
@@ -434,7 +462,7 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
434462
#endif
435463

436464
/* Memory initialization. */
437-
for_each_canary(meta, set_canary_byte);
465+
set_canary(meta);
438466

439467
/*
440468
* We check slab_want_init_on_alloc() ourselves, rather than letting
@@ -495,7 +523,7 @@ static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool z
495523
alloc_covered_add(meta->alloc_stack_hash, -1);
496524

497525
/* Check canary bytes for memory corruption. */
498-
for_each_canary(meta, check_canary_byte);
526+
check_canary(meta);
499527

500528
/*
501529
* Clear memory if init-on-free is set. While we protect the page, the
@@ -751,7 +779,7 @@ static void kfence_check_all_canary(void)
751779
struct kfence_metadata *meta = &kfence_metadata[i];
752780

753781
if (meta->state == KFENCE_OBJECT_ALLOCATED)
754-
for_each_canary(meta, check_canary_byte);
782+
check_canary(meta);
755783
}
756784
}
757785

mm/kfence/kfence.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,15 @@
2121
* lower 3 bits of the address, to detect memory corruptions with higher
2222
* probability, where similar constants are used.
2323
*/
24-
#define KFENCE_CANARY_PATTERN(addr) ((u8)0xaa ^ (u8)((unsigned long)(addr) & 0x7))
24+
#define KFENCE_CANARY_PATTERN_U8(addr) ((u8)0xaa ^ (u8)((unsigned long)(addr) & 0x7))
25+
26+
/*
27+
* Define a continuous 8-byte canary starting from a multiple of 8. The canary
28+
* of each byte is only related to the lowest three bits of its address, so the
29+
* canary of every 8 bytes is the same. 64-bit memory can be filled and checked
30+
* at a time instead of byte by byte to improve performance.
31+
*/
32+
#define KFENCE_CANARY_PATTERN_U64 ((u64)0xaaaaaaaaaaaaaaaa ^ (u64)(0x0706050403020100))
2533

2634
/* Maximum stack depth for reports. */
2735
#define KFENCE_STACK_DEPTH 64

mm/kfence/report.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ static void print_diff_canary(unsigned long address, size_t bytes_to_show,
168168

169169
pr_cont("[");
170170
for (cur = (const u8 *)address; cur < end; cur++) {
171-
if (*cur == KFENCE_CANARY_PATTERN(cur))
171+
if (*cur == KFENCE_CANARY_PATTERN_U8(cur))
172172
pr_cont(" .");
173173
else if (no_hash_pointers)
174174
pr_cont(" 0x%02x", *cur);

0 commit comments

Comments
 (0)