Closed
Description
The following example (godbolt):
#[repr(align(128))] #[derive(Copy, Clone)] pub struct A(u8);
pub unsafe fn foo(x: &A) -> A {
*x
}
pub unsafe fn bar(x: &A) -> A {
let mut y: A = std::mem::uninitialized();
std::ptr::copy_nonoverlapping(
x as *const A, &mut y as *mut A, 1
);
y
}
produces the following LLVM-IR:
%A = type { [0 x i8], i8, [127 x i8] }
define void @_ZN7example3foo17h89c067fa0b9a17bcE(%A* noalias nocapture sret dereferenceable(128), %A* noalias nocapture readonly align 128 dereferenceable(128) %x) unnamed_addr #0 {
%1 = getelementptr inbounds %A, %A* %0, i64 0, i32 0, i64 0
%2 = getelementptr inbounds %A, %A* %x, i64 0, i32 0, i64 0
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 128 %1, i8* nonnull align 128 %2, i64 128, i1 false)
ret void
}
define void @_ZN7example3bar17hd5d27715385ba486E(%A* noalias nocapture sret dereferenceable(128), %A* noalias nocapture readonly align 128 dereferenceable(128) %x) unnamed_addr #0 {
%1 = getelementptr inbounds %A, %A* %x, i64 0, i32 0, i64 0
%2 = getelementptr inbounds %A, %A* %0, i64 0, i32 0, i64 0
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 128 %2, i8* nonnull align 128 %1, i64 128, i1 false) #2
ret void
}
and machine code:
example::foo:
mov rax, rdi
movaps xmm0, xmmword ptr [rsi + 112]
movaps xmmword ptr [rdi + 112], xmm0
movaps xmm0, xmmword ptr [rsi + 96]
movaps xmmword ptr [rdi + 96], xmm0
movaps xmm0, xmmword ptr [rsi + 80]
movaps xmmword ptr [rdi + 80], xmm0
movaps xmm0, xmmword ptr [rsi + 64]
movaps xmmword ptr [rdi + 64], xmm0
movaps xmm0, xmmword ptr [rsi]
movaps xmm1, xmmword ptr [rsi + 16]
movaps xmm2, xmmword ptr [rsi + 32]
movaps xmm3, xmmword ptr [rsi + 48]
movaps xmmword ptr [rdi + 48], xmm3
movaps xmmword ptr [rdi + 32], xmm2
movaps xmmword ptr [rdi + 16], xmm1
movaps xmmword ptr [rdi], xmm0
ret
example::bar:
mov rax, rdi
movaps xmm0, xmmword ptr [rsi + 112]
movaps xmmword ptr [rdi + 112], xmm0
movaps xmm0, xmmword ptr [rsi + 96]
movaps xmmword ptr [rdi + 96], xmm0
movaps xmm0, xmmword ptr [rsi + 80]
movaps xmmword ptr [rdi + 80], xmm0
movaps xmm0, xmmword ptr [rsi + 64]
movaps xmmword ptr [rdi + 64], xmm0
movaps xmm0, xmmword ptr [rsi]
movaps xmm1, xmmword ptr [rsi + 16]
movaps xmm2, xmmword ptr [rsi + 32]
movaps xmm3, xmmword ptr [rsi + 48]
movaps xmmword ptr [rdi + 48], xmm3
movaps xmmword ptr [rdi + 32], xmm2
movaps xmmword ptr [rdi + 16], xmm1
movaps xmmword ptr [rdi], xmm0
ret
where 128 bytes are copied every time a value of type A is moved/copied/read/...
However, one actually only has to copy a single byte, since all other bytes are trailing padding. The expected machine code is (godbolt):
example::foo:
mov al, byte ptr [rdi]
ret
example::bar:
mov al, byte ptr [rdi]
ret
Metadata
Metadata
Assignees
Labels
Area: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.Category: An issue proposing an enhancement or a PR with one.Issue: Problems and improvements with respect to performance of generated code.Relevant to the compiler team, which will review and decide on the PR/issue.