Skip to content

Bad code generation for something that should be a simple memcpy #47997

Closed
@stbuehler

Description

@stbuehler

It seems the compiler only detects a 16-byte copy, but not a 4-byte copy. Also it doesn't detect a 16-byte copy across function boundaries, and does very weird stuff instead.

Rust example code:

#[repr(C)]
pub struct Bar(u8, u8, u8, u8);

impl Bar {
    fn new(a: u8, b: u8, c: u8, d: u8) -> Self {
        Bar(a, b, c, d)
    }
}

#[no_mangle]
pub fn make_bar_bad(data: &[u8; 4]) -> Bar {
    Bar(data[0], data[1], data[2], data[3])
}

#[no_mangle]
pub fn make_bar_bad_broken_inline(data: &[u8; 16]) -> (Bar, Bar, Bar, Bar) {
    (
        Bar::new(data[0], data[1], data[2], data[3]),
        Bar::new(data[4], data[5], data[6], data[7]),
        Bar::new(data[8], data[9], data[10], data[11]),
        Bar::new(data[12], data[13], data[14], data[15]),
    )
}

#[no_mangle]
pub fn make_bar_good(data: &[u8; 16]) -> (Bar, Bar, Bar, Bar) {
    (
        Bar(data[0], data[1], data[2], data[3]),
        Bar(data[4], data[5], data[6], data[7]),
        Bar(data[8], data[9], data[10], data[11]),
        Bar(data[12], data[13], data[14], data[15]),
    )
}

fn main() {}

Code generated in release mode from playground:

make_bar_bad:
	movzbl	3(%rdi), %ecx
	shll	$24, %ecx
	movzbl	2(%rdi), %edx
	shll	$16, %edx
	movzbl	1(%rdi), %esi
	shll	$8, %esi
	movzbl	(%rdi), %eax
	orl	%esi, %eax
	orl	%edx, %eax
	orl	%ecx, %eax
	retq

make_bar_bad_broken_inline:
	movzbl	11(%rsi), %eax
	movzbl	15(%rsi), %ecx
	shll	$8, %ecx
	orl	%eax, %ecx
	movzbl	3(%rsi), %eax
	movzbl	7(%rsi), %edx
	shll	$8, %edx
	orl	%eax, %edx
	pinsrw	$0, %edx, %xmm0
	pinsrw	$1, %ecx, %xmm0
	pxor	%xmm1, %xmm1
	punpcklbw	%xmm1, %xmm0
	punpcklwd	%xmm1, %xmm0
	pslld	$24, %xmm0
	movzbl	10(%rsi), %eax
	movzbl	14(%rsi), %ecx
	shll	$8, %ecx
	orl	%eax, %ecx
	movzbl	2(%rsi), %eax
	movzbl	6(%rsi), %edx
	shll	$8, %edx
	orl	%eax, %edx
	pinsrw	$0, %edx, %xmm2
	pinsrw	$1, %ecx, %xmm2
	punpcklbw	%xmm1, %xmm2
	punpcklwd	%xmm1, %xmm2
	pslld	$16, %xmm2
	movzbl	9(%rsi), %eax
	movzbl	13(%rsi), %ecx
	shll	$8, %ecx
	orl	%eax, %ecx
	movzbl	1(%rsi), %eax
	movzbl	5(%rsi), %edx
	shll	$8, %edx
	orl	%eax, %edx
	pinsrw	$0, %edx, %xmm3
	pinsrw	$1, %ecx, %xmm3
	punpcklbw	%xmm1, %xmm3
	punpcklwd	%xmm1, %xmm3
	pslld	$8, %xmm3
	movzbl	8(%rsi), %eax
	movzbl	12(%rsi), %ecx
	shll	$8, %ecx
	orl	%eax, %ecx
	movzbl	(%rsi), %eax
	movzbl	4(%rsi), %edx
	shll	$8, %edx
	orl	%eax, %edx
	pinsrw	$0, %edx, %xmm4
	pinsrw	$1, %ecx, %xmm4
	punpcklbw	%xmm1, %xmm4
	punpcklwd	%xmm1, %xmm4
	por	%xmm3, %xmm4
	por	%xmm0, %xmm2
	por	%xmm4, %xmm2
	movdqu	%xmm2, (%rdi)
	movq	%rdi, %rax
	retq

make_bar_good:
	movups	(%rsi), %xmm0
	movups	%xmm0, (%rdi)
	movq	%rdi, %rax
	retq

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-LLVMArea: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.C-enhancementCategory: An issue proposing an enhancement or a PR with one.I-slowIssue: Problems and improvements with respect to performance of generated code.T-compilerRelevant to the compiler team, which will review and decide on the PR/issue.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions