Skip to content

Commit 01c6d24

Browse files
committed
[AArch64][GlobalISel] Fold buildvector of bitcast
This adds a combine for buildvectors from bitcast values, sinking the bitcast and generating a buildvector from the original scalar type. %5:_(<4 x s8>) = G_BITCAST %16:_(s32) %18:_(s8), %19:_(s8), %20:_(s8), %21:_(s8) = G_UNMERGE_VALUES %5:_(<4 x s8>) %22:_(s8) = G_IMPLICIT_DEF %23:_(<8 x s8>) = G_BUILD_VECTOR %18:_(s8), %19:_(s8), %20:_(s8), %21:_(s8), %22:_(s8), %22:_(s8), %22:_(s8), %22:_(s8) => <2 x s32> G_BUILD_VECTOR %16, %undef <8 x s8> G_BITCAST It helps clean up some of the inefficiencies from widening scalar types.
1 parent 1cf5466 commit 01c6d24

File tree

18 files changed

+141
-287
lines changed

18 files changed

+141
-287
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,13 @@ class CombinerHelper {
265265
bool matchCombineShuffleToBuildVector(MachineInstr &MI) const;
266266
void applyCombineShuffleToBuildVector(MachineInstr &MI) const;
267267

268+
/// Combine G_BUILD_VECTOR(G_UNMERGE(G_BITCAST), Undef) to
269+
/// G_BITCAST(G_BUILD_VECTOR(..))
270+
bool matchCombineBuildVectorOfBitcast(MachineInstr &MI,
271+
SmallVector<Register> &Ops) const;
272+
void applyCombineBuildVectorOfBitcast(MachineInstr &MI,
273+
SmallVector<Register> &Ops) const;
274+
268275
/// Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
269276
/// Returns true if MI changed.
270277
///

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1579,6 +1579,13 @@ def combine_shuffle_vector_to_build_vector : GICombineRule<
15791579
[{ return Helper.matchCombineShuffleToBuildVector(*${root}); }]),
15801580
(apply [{ Helper.applyCombineShuffleToBuildVector(*${root}); }])>;
15811581

1582+
// Combines buildvector operations
1583+
def combine_build_vector_of_bitcast : GICombineRule<
1584+
(defs root:$root, concat_matchinfo:$matchinfo),
1585+
(match (G_BUILD_VECTOR $dst, GIVariadic<>:$unused):$root,
1586+
[{ return Helper.matchCombineBuildVectorOfBitcast(*${root}, ${matchinfo}); }]),
1587+
(apply [{ Helper.applyCombineBuildVectorOfBitcast(*${root}, ${matchinfo}); }])>;
1588+
15821589
def insert_vector_element_idx_undef : GICombineRule<
15831590
(defs root:$root),
15841591
(match (G_IMPLICIT_DEF $idx),

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,75 @@ void CombinerHelper::applyCombineConcatVectors(
386386
MI.eraseFromParent();
387387
}
388388

389+
bool CombinerHelper::matchCombineBuildVectorOfBitcast(
390+
MachineInstr &MI, SmallVector<Register> &Ops) const {
391+
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
392+
"Invalid instruction");
393+
394+
// Look at the first operand for a unmerge(bitcast) from a scalar type.
395+
GUnmerge *Unmerge =
396+
dyn_cast<GUnmerge>(MRI.getVRegDef(MI.getOperand(1).getReg()));
397+
if (!Unmerge || Unmerge->getReg(0) != MI.getOperand(1).getReg())
398+
return false;
399+
MachineInstr *BC = MRI.getVRegDef(Unmerge->getSourceReg());
400+
if (BC->getOpcode() != TargetOpcode::G_BITCAST)
401+
return false;
402+
LLT InputTy = MRI.getType(BC->getOperand(1).getReg());
403+
unsigned Factor = Unmerge->getNumDefs();
404+
if (!InputTy.isScalar() || (MI.getNumOperands() - 1) % Factor != 0)
405+
return false;
406+
407+
// Check if the build_vector is legal
408+
LLT BVDstTy = LLT::fixed_vector((MI.getNumOperands() - 1) / Factor, InputTy);
409+
if (!isLegalOrBeforeLegalizer(
410+
{TargetOpcode::G_BUILD_VECTOR, {BVDstTy, InputTy}}))
411+
return false;
412+
413+
// Check all other operands are bitcasts or undef.
414+
for (unsigned Idx = 0; Idx < MI.getNumOperands() - 1; Idx += Factor) {
415+
GUnmerge *Unmerge =
416+
dyn_cast<GUnmerge>(MRI.getVRegDef(MI.getOperand(Idx + 1).getReg()));
417+
if (!all_of(iota_range<unsigned>(0, Factor, false), [&](unsigned J) {
418+
MachineInstr *Src =
419+
MRI.getVRegDef(MI.getOperand(Idx + J + 1).getReg());
420+
if (Src->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
421+
return true;
422+
return Unmerge &&
423+
MI.getOperand(Idx + J + 1).getReg() == Unmerge->getReg(J);
424+
}))
425+
return false;
426+
if (!Unmerge)
427+
Ops.push_back(0);
428+
else {
429+
MachineInstr *BC = MRI.getVRegDef(Unmerge->getSourceReg());
430+
if (BC->getOpcode() != TargetOpcode::G_BITCAST ||
431+
MRI.getType(BC->getOperand(1).getReg()) != InputTy)
432+
return false;
433+
Ops.push_back(BC->getOperand(1).getReg());
434+
}
435+
}
436+
437+
return true;
438+
}
439+
void CombinerHelper::applyCombineBuildVectorOfBitcast(
440+
MachineInstr &MI, SmallVector<Register> &Ops) const {
441+
LLT SrcTy = MRI.getType(Ops[0]);
442+
// Build undef if any operations require it.
443+
Register Undef = 0;
444+
for (Register &Op : Ops) {
445+
if (!Op) {
446+
if (!Undef)
447+
Undef = Builder.buildUndef(SrcTy).getReg(0);
448+
Op = Undef;
449+
}
450+
}
451+
452+
LLT BVDstTy = LLT::fixed_vector(Ops.size(), SrcTy);
453+
auto BV = Builder.buildBuildVector(BVDstTy, Ops);
454+
Builder.buildBitcast(MI.getOperand(0).getReg(), BV);
455+
MI.eraseFromParent();
456+
}
457+
389458
bool CombinerHelper::matchCombineShuffleToBuildVector(MachineInstr &MI) const {
390459
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
391460
"Invalid instruction");

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,7 @@ def AArch64PostLegalizerCombiner
345345
ptr_add_immed_chain, overlapping_and,
346346
split_store_zero_128, undef_combines,
347347
select_to_minmax, or_to_bsp, combine_concat_vector,
348+
combine_build_vector_of_bitcast,
348349
commute_constant_to_rhs,
349350
push_freeze_to_prevent_poison_from_propagating,
350351
combine_mul_cmlt, combine_use_vector_truncate, extmultomull]> {

llvm/test/CodeGen/AArch64/add.ll

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -149,28 +149,10 @@ define void @v4i8(ptr %p1, ptr %p2) {
149149
;
150150
; CHECK-GI-LABEL: v4i8:
151151
; CHECK-GI: // %bb.0: // %entry
152-
; CHECK-GI-NEXT: ldr w8, [x0]
153-
; CHECK-GI-NEXT: ldr w9, [x1]
154-
; CHECK-GI-NEXT: fmov s0, w8
155-
; CHECK-GI-NEXT: fmov s1, w9
156-
; CHECK-GI-NEXT: mov b2, v0.b[1]
157-
; CHECK-GI-NEXT: mov b3, v1.b[1]
158-
; CHECK-GI-NEXT: mov b4, v0.b[2]
159-
; CHECK-GI-NEXT: mov b5, v0.b[3]
160-
; CHECK-GI-NEXT: fmov w8, s2
161-
; CHECK-GI-NEXT: mov b2, v1.b[2]
162-
; CHECK-GI-NEXT: fmov w9, s3
163-
; CHECK-GI-NEXT: mov b3, v1.b[3]
164-
; CHECK-GI-NEXT: mov v0.h[1], w8
165-
; CHECK-GI-NEXT: mov v1.h[1], w9
166-
; CHECK-GI-NEXT: fmov w8, s4
167-
; CHECK-GI-NEXT: fmov w9, s2
168-
; CHECK-GI-NEXT: mov v0.h[2], w8
169-
; CHECK-GI-NEXT: mov v1.h[2], w9
170-
; CHECK-GI-NEXT: fmov w8, s5
171-
; CHECK-GI-NEXT: fmov w9, s3
172-
; CHECK-GI-NEXT: mov v0.h[3], w8
173-
; CHECK-GI-NEXT: mov v1.h[3], w9
152+
; CHECK-GI-NEXT: ldr s0, [x0]
153+
; CHECK-GI-NEXT: ldr s1, [x1]
154+
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
155+
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
174156
; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h
175157
; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
176158
; CHECK-GI-NEXT: fmov w8, s0

llvm/test/CodeGen/AArch64/andorxor.ll

Lines changed: 12 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -439,28 +439,10 @@ define void @and_v4i8(ptr %p1, ptr %p2) {
439439
;
440440
; CHECK-GI-LABEL: and_v4i8:
441441
; CHECK-GI: // %bb.0: // %entry
442-
; CHECK-GI-NEXT: ldr w8, [x0]
443-
; CHECK-GI-NEXT: ldr w9, [x1]
444-
; CHECK-GI-NEXT: fmov s0, w8
445-
; CHECK-GI-NEXT: fmov s1, w9
446-
; CHECK-GI-NEXT: mov b2, v0.b[1]
447-
; CHECK-GI-NEXT: mov b3, v1.b[1]
448-
; CHECK-GI-NEXT: mov b4, v0.b[2]
449-
; CHECK-GI-NEXT: mov b5, v0.b[3]
450-
; CHECK-GI-NEXT: fmov w8, s2
451-
; CHECK-GI-NEXT: mov b2, v1.b[2]
452-
; CHECK-GI-NEXT: fmov w9, s3
453-
; CHECK-GI-NEXT: mov b3, v1.b[3]
454-
; CHECK-GI-NEXT: mov v0.h[1], w8
455-
; CHECK-GI-NEXT: mov v1.h[1], w9
456-
; CHECK-GI-NEXT: fmov w8, s4
457-
; CHECK-GI-NEXT: fmov w9, s2
458-
; CHECK-GI-NEXT: mov v0.h[2], w8
459-
; CHECK-GI-NEXT: mov v1.h[2], w9
460-
; CHECK-GI-NEXT: fmov w8, s5
461-
; CHECK-GI-NEXT: fmov w9, s3
462-
; CHECK-GI-NEXT: mov v0.h[3], w8
463-
; CHECK-GI-NEXT: mov v1.h[3], w9
442+
; CHECK-GI-NEXT: ldr s0, [x0]
443+
; CHECK-GI-NEXT: ldr s1, [x1]
444+
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
445+
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
464446
; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
465447
; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
466448
; CHECK-GI-NEXT: fmov w8, s0
@@ -488,28 +470,10 @@ define void @or_v4i8(ptr %p1, ptr %p2) {
488470
;
489471
; CHECK-GI-LABEL: or_v4i8:
490472
; CHECK-GI: // %bb.0: // %entry
491-
; CHECK-GI-NEXT: ldr w8, [x0]
492-
; CHECK-GI-NEXT: ldr w9, [x1]
493-
; CHECK-GI-NEXT: fmov s0, w8
494-
; CHECK-GI-NEXT: fmov s1, w9
495-
; CHECK-GI-NEXT: mov b2, v0.b[1]
496-
; CHECK-GI-NEXT: mov b3, v1.b[1]
497-
; CHECK-GI-NEXT: mov b4, v0.b[2]
498-
; CHECK-GI-NEXT: mov b5, v0.b[3]
499-
; CHECK-GI-NEXT: fmov w8, s2
500-
; CHECK-GI-NEXT: mov b2, v1.b[2]
501-
; CHECK-GI-NEXT: fmov w9, s3
502-
; CHECK-GI-NEXT: mov b3, v1.b[3]
503-
; CHECK-GI-NEXT: mov v0.h[1], w8
504-
; CHECK-GI-NEXT: mov v1.h[1], w9
505-
; CHECK-GI-NEXT: fmov w8, s4
506-
; CHECK-GI-NEXT: fmov w9, s2
507-
; CHECK-GI-NEXT: mov v0.h[2], w8
508-
; CHECK-GI-NEXT: mov v1.h[2], w9
509-
; CHECK-GI-NEXT: fmov w8, s5
510-
; CHECK-GI-NEXT: fmov w9, s3
511-
; CHECK-GI-NEXT: mov v0.h[3], w8
512-
; CHECK-GI-NEXT: mov v1.h[3], w9
473+
; CHECK-GI-NEXT: ldr s0, [x0]
474+
; CHECK-GI-NEXT: ldr s1, [x1]
475+
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
476+
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
513477
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
514478
; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
515479
; CHECK-GI-NEXT: fmov w8, s0
@@ -537,28 +501,10 @@ define void @xor_v4i8(ptr %p1, ptr %p2) {
537501
;
538502
; CHECK-GI-LABEL: xor_v4i8:
539503
; CHECK-GI: // %bb.0: // %entry
540-
; CHECK-GI-NEXT: ldr w8, [x0]
541-
; CHECK-GI-NEXT: ldr w9, [x1]
542-
; CHECK-GI-NEXT: fmov s0, w8
543-
; CHECK-GI-NEXT: fmov s1, w9
544-
; CHECK-GI-NEXT: mov b2, v0.b[1]
545-
; CHECK-GI-NEXT: mov b3, v1.b[1]
546-
; CHECK-GI-NEXT: mov b4, v0.b[2]
547-
; CHECK-GI-NEXT: mov b5, v0.b[3]
548-
; CHECK-GI-NEXT: fmov w8, s2
549-
; CHECK-GI-NEXT: mov b2, v1.b[2]
550-
; CHECK-GI-NEXT: fmov w9, s3
551-
; CHECK-GI-NEXT: mov b3, v1.b[3]
552-
; CHECK-GI-NEXT: mov v0.h[1], w8
553-
; CHECK-GI-NEXT: mov v1.h[1], w9
554-
; CHECK-GI-NEXT: fmov w8, s4
555-
; CHECK-GI-NEXT: fmov w9, s2
556-
; CHECK-GI-NEXT: mov v0.h[2], w8
557-
; CHECK-GI-NEXT: mov v1.h[2], w9
558-
; CHECK-GI-NEXT: fmov w8, s5
559-
; CHECK-GI-NEXT: fmov w9, s3
560-
; CHECK-GI-NEXT: mov v0.h[3], w8
561-
; CHECK-GI-NEXT: mov v1.h[3], w9
504+
; CHECK-GI-NEXT: ldr s0, [x0]
505+
; CHECK-GI-NEXT: ldr s1, [x1]
506+
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
507+
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
562508
; CHECK-GI-NEXT: eor v0.8b, v0.8b, v1.8b
563509
; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
564510
; CHECK-GI-NEXT: fmov w8, s0

llvm/test/CodeGen/AArch64/bitcast-extend.ll

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,8 @@ define <4 x i16> @z_i32_v4i16(i32 %x) {
1212
;
1313
; CHECK-GI-LABEL: z_i32_v4i16:
1414
; CHECK-GI: // %bb.0:
15-
; CHECK-GI-NEXT: fmov s0, w0
16-
; CHECK-GI-NEXT: mov b1, v0.b[1]
17-
; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
18-
; CHECK-GI-NEXT: mov b3, v0.b[2]
19-
; CHECK-GI-NEXT: mov b0, v0.b[3]
20-
; CHECK-GI-NEXT: mov v2.b[1], v1.b[0]
21-
; CHECK-GI-NEXT: mov v2.b[2], v3.b[0]
22-
; CHECK-GI-NEXT: mov v2.b[3], v0.b[0]
23-
; CHECK-GI-NEXT: ushll v0.8h, v2.8b, #0
15+
; CHECK-GI-NEXT: mov v0.s[0], w0
16+
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
2417
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
2518
; CHECK-GI-NEXT: ret
2619
%b = bitcast i32 %x to <4 x i8>
@@ -115,15 +108,8 @@ define <4 x i16> @s_i32_v4i16(i32 %x) {
115108
;
116109
; CHECK-GI-LABEL: s_i32_v4i16:
117110
; CHECK-GI: // %bb.0:
118-
; CHECK-GI-NEXT: fmov s0, w0
119-
; CHECK-GI-NEXT: mov b1, v0.b[1]
120-
; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
121-
; CHECK-GI-NEXT: mov b3, v0.b[2]
122-
; CHECK-GI-NEXT: mov b0, v0.b[3]
123-
; CHECK-GI-NEXT: mov v2.b[1], v1.b[0]
124-
; CHECK-GI-NEXT: mov v2.b[2], v3.b[0]
125-
; CHECK-GI-NEXT: mov v2.b[3], v0.b[0]
126-
; CHECK-GI-NEXT: sshll v0.8h, v2.8b, #0
111+
; CHECK-GI-NEXT: mov v0.s[0], w0
112+
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
127113
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
128114
; CHECK-GI-NEXT: ret
129115
%b = bitcast i32 %x to <4 x i8>

llvm/test/CodeGen/AArch64/bitcast.ll

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -79,16 +79,8 @@ define <4 x i8> @bitcast_i32_v4i8(i32 %a, i32 %b){
7979
; CHECK-GI-LABEL: bitcast_i32_v4i8:
8080
; CHECK-GI: // %bb.0:
8181
; CHECK-GI-NEXT: add w8, w0, w1
82-
; CHECK-GI-NEXT: fmov s0, w8
83-
; CHECK-GI-NEXT: mov b1, v0.b[1]
84-
; CHECK-GI-NEXT: mov b2, v0.b[2]
85-
; CHECK-GI-NEXT: fmov w8, s1
86-
; CHECK-GI-NEXT: mov b1, v0.b[3]
87-
; CHECK-GI-NEXT: mov v0.h[1], w8
88-
; CHECK-GI-NEXT: fmov w8, s2
89-
; CHECK-GI-NEXT: mov v0.h[2], w8
90-
; CHECK-GI-NEXT: fmov w8, s1
91-
; CHECK-GI-NEXT: mov v0.h[3], w8
82+
; CHECK-GI-NEXT: mov v0.s[0], w8
83+
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
9284
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
9385
; CHECK-GI-NEXT: ret
9486
%c = add i32 %a, %b
@@ -131,11 +123,8 @@ define <2 x i16> @bitcast_i32_v2i16(i32 %a, i32 %b){
131123
; CHECK-GI-LABEL: bitcast_i32_v2i16:
132124
; CHECK-GI: // %bb.0:
133125
; CHECK-GI-NEXT: add w8, w0, w1
134-
; CHECK-GI-NEXT: fmov s0, w8
135-
; CHECK-GI-NEXT: mov h1, v0.h[1]
136126
; CHECK-GI-NEXT: mov v0.s[0], w8
137-
; CHECK-GI-NEXT: fmov w8, s1
138-
; CHECK-GI-NEXT: mov v0.s[1], w8
127+
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
139128
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
140129
; CHECK-GI-NEXT: ret
141130
%c = add i32 %a, %b

llvm/test/CodeGen/AArch64/ctlz.ll

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -87,16 +87,8 @@ define void @v4i8(ptr %p1) {
8787
;
8888
; CHECK-GI-LABEL: v4i8:
8989
; CHECK-GI: // %bb.0: // %entry
90-
; CHECK-GI-NEXT: ldr w8, [x0]
91-
; CHECK-GI-NEXT: fmov s0, w8
92-
; CHECK-GI-NEXT: mov b1, v0.b[1]
93-
; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
94-
; CHECK-GI-NEXT: mov b3, v0.b[2]
95-
; CHECK-GI-NEXT: mov b0, v0.b[3]
96-
; CHECK-GI-NEXT: mov v2.b[1], v1.b[0]
97-
; CHECK-GI-NEXT: mov v2.b[2], v3.b[0]
98-
; CHECK-GI-NEXT: mov v2.b[3], v0.b[0]
99-
; CHECK-GI-NEXT: clz v0.8b, v2.8b
90+
; CHECK-GI-NEXT: ldr s0, [x0]
91+
; CHECK-GI-NEXT: clz v0.8b, v0.8b
10092
; CHECK-GI-NEXT: fmov w8, s0
10193
; CHECK-GI-NEXT: str w8, [x0]
10294
; CHECK-GI-NEXT: ret

llvm/test/CodeGen/AArch64/ctpop.ll

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -85,16 +85,8 @@ define void @v4i8(ptr %p1) {
8585
;
8686
; CHECK-GI-LABEL: v4i8:
8787
; CHECK-GI: // %bb.0: // %entry
88-
; CHECK-GI-NEXT: ldr w8, [x0]
89-
; CHECK-GI-NEXT: fmov s0, w8
90-
; CHECK-GI-NEXT: mov b1, v0.b[1]
91-
; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
92-
; CHECK-GI-NEXT: mov b3, v0.b[2]
93-
; CHECK-GI-NEXT: mov b0, v0.b[3]
94-
; CHECK-GI-NEXT: mov v2.b[1], v1.b[0]
95-
; CHECK-GI-NEXT: mov v2.b[2], v3.b[0]
96-
; CHECK-GI-NEXT: mov v2.b[3], v0.b[0]
97-
; CHECK-GI-NEXT: cnt v0.8b, v2.8b
88+
; CHECK-GI-NEXT: ldr s0, [x0]
89+
; CHECK-GI-NEXT: cnt v0.8b, v0.8b
9890
; CHECK-GI-NEXT: fmov w8, s0
9991
; CHECK-GI-NEXT: str w8, [x0]
10092
; CHECK-GI-NEXT: ret

llvm/test/CodeGen/AArch64/cttz.ll

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -114,24 +114,15 @@ define void @v4i8(ptr %p1) {
114114
;
115115
; CHECK-GI-LABEL: v4i8:
116116
; CHECK-GI: // %bb.0: // %entry
117-
; CHECK-GI-NEXT: ldr w9, [x0]
118117
; CHECK-GI-NEXT: mov w8, #255 // =0xff
119-
; CHECK-GI-NEXT: fmov s0, w9
120-
; CHECK-GI-NEXT: mov b1, v0.b[1]
121-
; CHECK-GI-NEXT: mov b2, v0.b[2]
122-
; CHECK-GI-NEXT: mov b3, v0.b[3]
123-
; CHECK-GI-NEXT: fmov w9, s1
124-
; CHECK-GI-NEXT: fmov s1, w8
125-
; CHECK-GI-NEXT: mov v0.h[1], w9
126-
; CHECK-GI-NEXT: mov v1.h[1], w8
127-
; CHECK-GI-NEXT: fmov w9, s2
128-
; CHECK-GI-NEXT: mov v0.h[2], w9
129-
; CHECK-GI-NEXT: mov v1.h[2], w8
130-
; CHECK-GI-NEXT: fmov w9, s3
131-
; CHECK-GI-NEXT: mov v0.h[3], w9
132-
; CHECK-GI-NEXT: mov v1.h[3], w8
133-
; CHECK-GI-NEXT: eor v2.8b, v0.8b, v1.8b
134-
; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h
118+
; CHECK-GI-NEXT: ldr s1, [x0]
119+
; CHECK-GI-NEXT: fmov s0, w8
120+
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
121+
; CHECK-GI-NEXT: mov v0.h[1], w8
122+
; CHECK-GI-NEXT: mov v0.h[2], w8
123+
; CHECK-GI-NEXT: mov v0.h[3], w8
124+
; CHECK-GI-NEXT: eor v2.8b, v1.8b, v0.8b
125+
; CHECK-GI-NEXT: add v0.4h, v1.4h, v0.4h
135126
; CHECK-GI-NEXT: and v0.8b, v2.8b, v0.8b
136127
; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
137128
; CHECK-GI-NEXT: cnt v0.8b, v0.8b

0 commit comments

Comments
 (0)