Skip to content

llvm generates SVE code with only +sme and no streaming mode enabled #86743

Closed
@aemerson

Description

@aemerson
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "arm64-apple-darwin"

define i32 @div_crash(<16 x i32> %0, ptr %1) #0 {
  %3 = sdiv <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %0
  %4 = trunc <16 x i32> %3 to <16 x i8>
  store <16 x i8> %4, ptr %1, align 1
  ret i32 0
}

attributes #0 = { "target-cpu"="generic" }

Run:

llc -mattr=+sme,+neon

on that and you'll get the following code:

	.section	__TEXT,__text,regular,pure_instructions
	.globl	_div_crash                      ; -- Begin function div_crash
	.p2align	2
_div_crash:                             ; @div_crash
	.cfi_startproc
; %bb.0:
	ptrue	p0.s, vl4
	movi.4s	v4, #1
                                        ; kill: def $q3 killed $q3 def $z3
                                        ; kill: def $q2 killed $q2 def $z2
                                        ; kill: def $q1 killed $q1 def $z1
                                        ; kill: def $q0 killed $q0 def $z0
	mov	x8, x0
	mov	w0, wzr
	sdivr	z1.s, p0/m, z1.s, z4.s
	sdivr	z0.s, p0/m, z0.s, z4.s
	sdivr	z3.s, p0/m, z3.s, z4.s
	uzp1.8h	v0, v0, v1
	sdivr	z2.s, p0/m, z2.s, z4.s
	uzp1.8h	v2, v2, v3
	uzp1.16b	v0, v0, v2
	str	q0, [x8]
	ret
	.cfi_endproc

without +sve there's no guarantee that any of these SVE instructions are legal to execute, since there's no attributes enabling streaming mode.

Metadata

Metadata

Assignees

Labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions