Skip to content

Commit 314354d

Browse files
committed
Fix suboptimal codegen in memset
1 parent c30322a commit 314354d

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

src/mem/x86_64.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) {
109109
#[inline(always)]
110110
#[cfg(not(target_feature = "ermsb"))]
111111
pub unsafe fn set_bytes(mut dest: *mut u8, c: u8, count: usize) {
112+
let c = c as u64 * 0x0101_0101_0101_0101;
112113
let (pre_byte_count, qword_count, byte_count) = rep_param(dest, count);
113114
// Separating the blocks gives the compiler more freedom to reorder instructions.
114115
// It also allows us to trivially skip the rep stosb, which is faster when memcpying
@@ -118,23 +119,23 @@ pub unsafe fn set_bytes(mut dest: *mut u8, c: u8, count: usize) {
118119
"rep stosb",
119120
inout("ecx") pre_byte_count => _,
120121
inout("rdi") dest => dest,
121-
in("al") c,
122+
in("rax") c,
122123
options(nostack, preserves_flags)
123124
);
124125
}
125126
asm!(
126127
"rep stosq",
127128
inout("rcx") qword_count => _,
128129
inout("rdi") dest => dest,
129-
in("rax") (c as u64) * 0x0101010101010101,
130+
in("rax") c,
130131
options(nostack, preserves_flags)
131132
);
132133
if byte_count > 0 {
133134
asm!(
134135
"rep stosb",
135136
inout("ecx") byte_count => _,
136137
inout("rdi") dest => _,
137-
in("al") c,
138+
in("rax") c,
138139
options(nostack, preserves_flags)
139140
);
140141
}

0 commit comments

Comments
 (0)