Skip to content

Commit c596c7e

Browse files
teknoraveresmil
authored andcommitted
riscv: optimized memset
The generic memset is defined as a byte at time write. This is always safe, but it's slower than a 4 byte or even 8 byte write. Write a generic memset which fills the data one byte at time until the destination is aligned, then fills using the largest size allowed, and finally fills the remaining data one byte at time. Signed-off-by: Matteo Croce <mcroce@microsoft.com>
1 parent 5b21e2d commit c596c7e

File tree

6 files changed

+42
-135
lines changed

6 files changed

+42
-135
lines changed

arch/riscv/include/asm/string.h

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,10 @@
66
#ifndef _ASM_RISCV_STRING_H
77
#define _ASM_RISCV_STRING_H
88

9-
#include <linux/types.h>
10-
#include <linux/linkage.h>
11-
12-
#define __HAVE_ARCH_MEMSET
13-
extern asmlinkage void *memset(void *, int, size_t);
14-
extern asmlinkage void *__memset(void *, int, size_t);
15-
169
#ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
10+
#define __HAVE_ARCH_MEMSET
11+
void *memset(void *s, int c, size_t count);
12+
void *__memset(void *s, int c, size_t count);
1713
#define __HAVE_ARCH_MEMCPY
1814
void *memcpy(void *dest, const void *src, size_t count);
1915
void *__memcpy(void *dest, const void *src, size_t count);

arch/riscv/kernel/Makefile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ obj-y += syscall_table.o
3131
obj-y += sys_riscv.o
3232
obj-y += time.o
3333
obj-y += traps.o
34-
obj-y += riscv_ksyms.o
3534
obj-y += stacktrace.o
3635
obj-y += cacheinfo.o
3736
obj-y += patch.o

arch/riscv/kernel/riscv_ksyms.c

Lines changed: 0 additions & 13 deletions
This file was deleted.

arch/riscv/lib/Makefile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# SPDX-License-Identifier: GPL-2.0-only
22
lib-y += delay.o
3-
lib-y += memset.o
43
lib-$(CONFIG_MMU) += uaccess.o
54
lib-$(CONFIG_64BIT) += tishift.o
65
lib-$(CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE) += string.o

arch/riscv/lib/memset.S

Lines changed: 0 additions & 113 deletions
This file was deleted.

arch/riscv/lib/string.c

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,3 +116,42 @@ EXPORT_SYMBOL(__memmove);
116116

117117
void *memmove(void *dest, const void *src, size_t count) __weak __alias(__memmove);
118118
EXPORT_SYMBOL(memmove);
119+
120+
void *__memset(void *s, int c, size_t count)
121+
{
122+
union types dest = { .as_u8 = s };
123+
124+
if (count >= MIN_THRESHOLD) {
125+
unsigned long cu = (unsigned long)c;
126+
127+
/* Compose an ulong with 'c' repeated 4/8 times */
128+
cu |= cu << 8;
129+
cu |= cu << 16;
130+
#if BITS_PER_LONG == 64
131+
cu |= cu << 32;
132+
#endif
133+
134+
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
135+
/*
136+
* Fill the buffer one byte at time until
137+
* the destination is word aligned.
138+
*/
139+
for (; count && dest.as_uptr & mask; count--)
140+
*dest.as_u8++ = c;
141+
}
142+
143+
/* Copy using the largest size allowed */
144+
for (; count >= bytes_long; count -= bytes_long)
145+
*dest.as_ulong++ = cu;
146+
}
147+
148+
/* copy the remainder */
149+
while (count--)
150+
*dest.as_u8++ = c;
151+
152+
return s;
153+
}
154+
EXPORT_SYMBOL(__memset);
155+
156+
void *memset(void *s, int c, size_t count) __weak __alias(__memset);
157+
EXPORT_SYMBOL(memset);

0 commit comments

Comments
 (0)