Skip to content

[1.; 10] generates worse code than [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.] #56333

Closed
@jrmuizel

Description

@jrmuizel

Here's an example

pub struct L {
    a: [f64; 10],
}

pub struct Allocation<'a, T: 'a> {
    f: &'a mut T,
}

impl<'a, T> Allocation<'a, T> {
    pub fn init(self, value: T) {
        *self.f = value;
    }
}

#[inline(never)]
pub fn foo(a: Allocation<L>) {
    a.init(L {
        a: [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
    });
}

#[inline(never)]
pub fn bar(a: Allocation<L>) {
    a.init(L { a: [1.; 10] });
}

gives

.LCPI0_0:
  .quad 4607182418800017408
  .quad 4607182418800017408
example::foo:
  movaps xmm0, xmmword ptr [rip + .LCPI0_0]
  movups xmmword ptr [rdi], xmm0
  movups xmmword ptr [rdi + 16], xmm0
  movups xmmword ptr [rdi + 32], xmm0
  movups xmmword ptr [rdi + 48], xmm0
  movups xmmword ptr [rdi + 64], xmm0
  ret

.LCPI1_0:
  .quad 4607182418800017408
  .quad 4607182418800017408
example::bar:
  sub rsp, 88
  movaps xmm0, xmmword ptr [rip + .LCPI1_0]
  movaps xmmword ptr [rsp], xmm0
  movaps xmmword ptr [rsp + 16], xmm0
  movaps xmmword ptr [rsp + 32], xmm0
  movaps xmmword ptr [rsp + 48], xmm0
  movaps xmmword ptr [rsp + 64], xmm0
  movaps xmm0, xmmword ptr [rsp + 64]
  movups xmmword ptr [rdi + 64], xmm0
  movaps xmm0, xmmword ptr [rsp + 48]
  movups xmmword ptr [rdi + 48], xmm0
  movaps xmm0, xmmword ptr [rsp + 32]
  movups xmmword ptr [rdi + 32], xmm0
  movaps xmm0, xmmword ptr [rsp + 16]
  movups xmmword ptr [rdi + 16], xmm0
  movaps xmm0, xmmword ptr [rsp]
  movups xmmword ptr [rdi], xmm0
  add rsp, 88
  ret

which has an additional copy of the array.

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-LLVMArea: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.A-codegenArea: Code generationI-slowIssue: Problems and improvements with respect to performance of generated code.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions