Closed
Description
It seems the compiler only detects a 16-byte copy, but not a 4-byte copy. Also it doesn't detect a 16-byte copy across function boundaries, and does very weird stuff instead.
Rust example code:
#[repr(C)]
pub struct Bar(u8, u8, u8, u8);
impl Bar {
fn new(a: u8, b: u8, c: u8, d: u8) -> Self {
Bar(a, b, c, d)
}
}
#[no_mangle]
pub fn make_bar_bad(data: &[u8; 4]) -> Bar {
Bar(data[0], data[1], data[2], data[3])
}
#[no_mangle]
pub fn make_bar_bad_broken_inline(data: &[u8; 16]) -> (Bar, Bar, Bar, Bar) {
(
Bar::new(data[0], data[1], data[2], data[3]),
Bar::new(data[4], data[5], data[6], data[7]),
Bar::new(data[8], data[9], data[10], data[11]),
Bar::new(data[12], data[13], data[14], data[15]),
)
}
#[no_mangle]
pub fn make_bar_good(data: &[u8; 16]) -> (Bar, Bar, Bar, Bar) {
(
Bar(data[0], data[1], data[2], data[3]),
Bar(data[4], data[5], data[6], data[7]),
Bar(data[8], data[9], data[10], data[11]),
Bar(data[12], data[13], data[14], data[15]),
)
}
fn main() {}
Code generated in release mode from playground:
make_bar_bad:
movzbl 3(%rdi), %ecx
shll $24, %ecx
movzbl 2(%rdi), %edx
shll $16, %edx
movzbl 1(%rdi), %esi
shll $8, %esi
movzbl (%rdi), %eax
orl %esi, %eax
orl %edx, %eax
orl %ecx, %eax
retq
make_bar_bad_broken_inline:
movzbl 11(%rsi), %eax
movzbl 15(%rsi), %ecx
shll $8, %ecx
orl %eax, %ecx
movzbl 3(%rsi), %eax
movzbl 7(%rsi), %edx
shll $8, %edx
orl %eax, %edx
pinsrw $0, %edx, %xmm0
pinsrw $1, %ecx, %xmm0
pxor %xmm1, %xmm1
punpcklbw %xmm1, %xmm0
punpcklwd %xmm1, %xmm0
pslld $24, %xmm0
movzbl 10(%rsi), %eax
movzbl 14(%rsi), %ecx
shll $8, %ecx
orl %eax, %ecx
movzbl 2(%rsi), %eax
movzbl 6(%rsi), %edx
shll $8, %edx
orl %eax, %edx
pinsrw $0, %edx, %xmm2
pinsrw $1, %ecx, %xmm2
punpcklbw %xmm1, %xmm2
punpcklwd %xmm1, %xmm2
pslld $16, %xmm2
movzbl 9(%rsi), %eax
movzbl 13(%rsi), %ecx
shll $8, %ecx
orl %eax, %ecx
movzbl 1(%rsi), %eax
movzbl 5(%rsi), %edx
shll $8, %edx
orl %eax, %edx
pinsrw $0, %edx, %xmm3
pinsrw $1, %ecx, %xmm3
punpcklbw %xmm1, %xmm3
punpcklwd %xmm1, %xmm3
pslld $8, %xmm3
movzbl 8(%rsi), %eax
movzbl 12(%rsi), %ecx
shll $8, %ecx
orl %eax, %ecx
movzbl (%rsi), %eax
movzbl 4(%rsi), %edx
shll $8, %edx
orl %eax, %edx
pinsrw $0, %edx, %xmm4
pinsrw $1, %ecx, %xmm4
punpcklbw %xmm1, %xmm4
punpcklwd %xmm1, %xmm4
por %xmm3, %xmm4
por %xmm0, %xmm2
por %xmm4, %xmm2
movdqu %xmm2, (%rdi)
movq %rdi, %rax
retq
make_bar_good:
movups (%rsi), %xmm0
movups %xmm0, (%rdi)
movq %rdi, %rax
retq
Metadata
Metadata
Assignees
Labels
Area: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.Category: An issue proposing an enhancement or a PR with one.Issue: Problems and improvements with respect to performance of generated code.Relevant to the compiler team, which will review and decide on the PR/issue.