Skip to content

Commit 1c9a93a

Browse files
Yashwant SinghYashwant Singh
authored andcommitted
[GlobalIsel][AMDGPU] Changing legalize rule for G_{UADDO|UADDE|USUBO|USUBE|SADDE|SSUBE}
Generic add and sub with carry are now legalized in a way to explicitly calculate carry/borrow output. i.e %6:_(s64), %7:_(s1) = G_UADDO %0, %1 becomes, %13:_(s32), %14:_(s1) = G_UADDO %2, %4 %15:_(s32), %16:_(s1) = G_UADDE %3, %5, %14 %6:_(s64) = G_MERGE_VALUES %13(s32), %15(s32) %7:_(s1) = G_ICMP intpred(ult), %6(s64), %1 Here G_MERGE and G_ICMP instructions are redundant for recalculating carry output. (Similar case for sub with borrow) This change fix this. Reviewed By: arsenm, #amdgpu Differential Revision: https://reviews.llvm.org/D137932
1 parent 99833cd commit 1c9a93a

File tree

13 files changed

+814
-2053
lines changed

13 files changed

+814
-2053
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -656,12 +656,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
656656
.widenScalarToNextPow2(0)
657657
.scalarize(0);
658658

659-
getActionDefinitionsBuilder({G_UADDO, G_USUBO,
660-
G_UADDE, G_SADDE, G_USUBE, G_SSUBE})
661-
.legalFor({{S32, S1}, {S32, S32}})
662-
.minScalar(0, S32)
663-
.scalarize(0)
664-
.lower();
659+
getActionDefinitionsBuilder(
660+
{G_UADDO, G_USUBO, G_UADDE, G_SADDE, G_USUBE, G_SSUBE})
661+
.legalFor({{S32, S1}, {S32, S32}})
662+
.clampScalar(0, S32, S32)
663+
.scalarize(0);
665664

666665
getActionDefinitionsBuilder(G_BITCAST)
667666
// Don't worry about the size constraint.

llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll

Lines changed: 9 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ define i64 @v_uaddo_i64(i64 %a, i64 %b) {
4141
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4242
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
4343
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
44-
; GFX7-NEXT: v_cmp_lt_u64_e32 vcc, v[0:1], v[2:3]
4544
; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
4645
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
4746
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -52,7 +51,6 @@ define i64 @v_uaddo_i64(i64 %a, i64 %b) {
5251
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5352
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
5453
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
55-
; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, v[0:1], v[2:3]
5654
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
5755
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
5856
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -63,7 +61,6 @@ define i64 @v_uaddo_i64(i64 %a, i64 %b) {
6361
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6462
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
6563
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
66-
; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[0:1], v[2:3]
6764
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
6865
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
6966
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -480,46 +477,28 @@ define amdgpu_ps i64 @s_uaddo_i64(i64 inreg %a, i64 inreg %b) {
480477
; GFX7-LABEL: s_uaddo_i64:
481478
; GFX7: ; %bb.0:
482479
; GFX7-NEXT: s_add_u32 s0, s0, s2
483-
; GFX7-NEXT: v_mov_b32_e32 v0, s2
484480
; GFX7-NEXT: s_addc_u32 s1, s1, s3
485-
; GFX7-NEXT: v_mov_b32_e32 v1, s3
486-
; GFX7-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[0:1]
487-
; GFX7-NEXT: v_mov_b32_e32 v1, s1
488-
; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
489-
; GFX7-NEXT: v_add_i32_e32 v0, vcc, s0, v0
490-
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
491-
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
492-
; GFX7-NEXT: v_readfirstlane_b32 s1, v1
481+
; GFX7-NEXT: s_cselect_b32 s2, 1, 0
482+
; GFX7-NEXT: s_add_u32 s0, s0, s2
483+
; GFX7-NEXT: s_addc_u32 s1, s1, 0
493484
; GFX7-NEXT: ; return to shader part epilog
494485
;
495486
; GFX8-LABEL: s_uaddo_i64:
496487
; GFX8: ; %bb.0:
497488
; GFX8-NEXT: s_add_u32 s0, s0, s2
498-
; GFX8-NEXT: v_mov_b32_e32 v0, s2
499489
; GFX8-NEXT: s_addc_u32 s1, s1, s3
500-
; GFX8-NEXT: v_mov_b32_e32 v1, s3
501-
; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[0:1]
502-
; GFX8-NEXT: v_mov_b32_e32 v1, s1
503-
; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
504-
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0
505-
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
506-
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
507-
; GFX8-NEXT: v_readfirstlane_b32 s1, v1
490+
; GFX8-NEXT: s_cselect_b32 s2, 1, 0
491+
; GFX8-NEXT: s_add_u32 s0, s0, s2
492+
; GFX8-NEXT: s_addc_u32 s1, s1, 0
508493
; GFX8-NEXT: ; return to shader part epilog
509494
;
510495
; GFX9-LABEL: s_uaddo_i64:
511496
; GFX9: ; %bb.0:
512497
; GFX9-NEXT: s_add_u32 s0, s0, s2
513-
; GFX9-NEXT: v_mov_b32_e32 v0, s2
514498
; GFX9-NEXT: s_addc_u32 s1, s1, s3
515-
; GFX9-NEXT: v_mov_b32_e32 v1, s3
516-
; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[0:1]
517-
; GFX9-NEXT: v_mov_b32_e32 v1, s1
518-
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
519-
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0
520-
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
521-
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
522-
; GFX9-NEXT: v_readfirstlane_b32 s1, v1
499+
; GFX9-NEXT: s_cselect_b32 s2, 1, 0
500+
; GFX9-NEXT: s_add_u32 s0, s0, s2
501+
; GFX9-NEXT: s_addc_u32 s1, s1, 0
523502
; GFX9-NEXT: ; return to shader part epilog
524503
%uaddo = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
525504
%add = extractvalue {i64, i1} %uaddo, 0

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sadde.mir

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s
33

44
---
55
name: test_sadde_s32
@@ -120,9 +120,13 @@ body: |
120120
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4
121121
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
122122
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
123-
; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s1) = G_SADDE [[COPY]], [[COPY1]], [[ICMP]]
123+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
124+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
125+
; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV]], [[UV2]], [[ICMP]]
126+
; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s32), [[SADDE1:%[0-9]+]]:_(s1) = G_SADDE [[UV1]], [[UV3]], [[UADDE1]]
127+
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE]](s32), [[SADDE]](s32)
124128
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SADDE1]](s1)
125-
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SADDE]](s64)
129+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
126130
; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32)
127131
%0:_(s64) = COPY $vgpr0_vgpr1
128132
%1:_(s64) = COPY $vgpr2_vgpr3

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssube.mir

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s
33

44
---
55
name: test_ssube_s32
@@ -119,9 +119,13 @@ body: |
119119
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4
120120
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
121121
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
122-
; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s1) = G_SSUBE [[COPY]], [[COPY1]], [[ICMP]]
122+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
123+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
124+
; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV]], [[UV2]], [[ICMP]]
125+
; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s32), [[SSUBE1:%[0-9]+]]:_(s1) = G_SSUBE [[UV1]], [[UV3]], [[USUBE1]]
126+
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBE]](s32), [[SSUBE]](s32)
123127
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SSUBE1]](s1)
124-
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SSUBE]](s64)
128+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
125129
; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32)
126130
%0:_(s64) = COPY $vgpr0_vgpr1
127131
%1:_(s64) = COPY $vgpr2_vgpr3

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uadde.mir

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s
33

44
---
55
name: test_uadde_s32
@@ -87,9 +87,9 @@ body: |
8787
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
8888
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
8989
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND %13, [[C1]]
90-
; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[AND]], [[AND1]], [[ICMP]]
91-
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UADDE]], [[C1]]
92-
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UADDE]](s32), [[AND2]]
90+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND1]](s32)
91+
; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[AND]], [[COPY2]], [[ICMP]]
92+
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UADDE]](s32), [[AND1]]
9393
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1)
9494
; CHECK-NEXT: $vgpr0 = COPY [[UADDE]](s32)
9595
; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32)
@@ -123,17 +123,12 @@ body: |
123123
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
124124
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
125125
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
126-
; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
127-
; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
128-
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1)
129-
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64)
130-
; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UV4]]
131-
; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[UV5]], [[UADDO3]]
132-
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
133-
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[COPY]]
134-
; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP1]](s1)
126+
; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV]], [[UV2]], [[ICMP]]
127+
; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDE1]]
128+
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE]](s32), [[UADDE2]](s32)
129+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDE3]](s1)
135130
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
136-
; CHECK-NEXT: $vgpr2 = COPY [[ZEXT1]](s32)
131+
; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32)
137132
%0:_(s64) = COPY $vgpr0_vgpr1
138133
%1:_(s64) = COPY $vgpr2_vgpr3
139134
%2:_(s32) = COPY $vgpr4

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir

Lines changed: 42 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s
33

44
---
55
name: test_uaddo_s32
@@ -41,9 +41,9 @@ body: |
4141
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]]
4242
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]]
4343
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]]
44-
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]]
44+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32)
4545
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
46-
; CHECK-NEXT: $vgpr0 = COPY [[AND3]](s32)
46+
; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
4747
; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32)
4848
%0:_(s32) = COPY $vgpr0
4949
%1:_(s32) = COPY $vgpr1
@@ -74,9 +74,9 @@ body: |
7474
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]]
7575
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]]
7676
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]]
77-
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]]
77+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32)
7878
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
79-
; CHECK-NEXT: $vgpr0 = COPY [[AND3]](s32)
79+
; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
8080
; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32)
8181
%0:_(s32) = COPY $vgpr0
8282
%1:_(s32) = COPY $vgpr1
@@ -106,8 +106,7 @@ body: |
106106
; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
107107
; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
108108
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
109-
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[COPY1]]
110-
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
109+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDE1]](s1)
111110
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
112111
; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32)
113112
%0:_(s64) = COPY $vgpr0_vgpr1
@@ -145,17 +144,17 @@ body: |
145144
; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[AND3]], [[AND4]]
146145
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]]
147146
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD1]](s32), [[AND5]]
148-
; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]]
149-
; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]]
150-
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
151-
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]]
147+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32)
148+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND5]](s32)
149+
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32)
150+
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]]
152151
; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
153152
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
154153
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
155154
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
156-
; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]]
157-
; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]]
158-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32)
155+
; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]]
156+
; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]]
157+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32)
159158
; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
160159
; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>)
161160
%0:_(<2 x s16>) = COPY $vgpr0
@@ -209,27 +208,27 @@ body: |
209208
; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
210209
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
211210
; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
212-
; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]]
213-
; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]]
214-
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32)
215-
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL]]
211+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32)
212+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND5]](s32)
213+
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32)
214+
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]]
216215
; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
217-
; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C1]]
218-
; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
219-
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32)
220-
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL1]]
216+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND8]](s32)
217+
; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
218+
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
219+
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]]
221220
; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
222-
; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
223-
; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
224-
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C]](s32)
225-
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL2]]
221+
; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
222+
; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
223+
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32)
224+
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL2]]
226225
; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
227226
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
228227
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
229-
; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]]
230-
; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]]
231-
; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]]
232-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND15]](s32), [[AND16]](s32), [[AND17]](s32)
228+
; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]]
229+
; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]]
230+
; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]]
231+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND12]](s32), [[AND13]](s32), [[AND14]](s32)
233232
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
234233
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
235234
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
@@ -287,27 +286,27 @@ body: |
287286
; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[AND9]], [[AND10]]
288287
; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C1]]
289288
; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD3]](s32), [[AND11]]
290-
; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]]
291-
; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]]
292-
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
293-
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL]]
289+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32)
290+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND5]](s32)
291+
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32)
292+
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]]
294293
; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
295-
; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C1]]
296-
; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C1]]
297-
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C]](s32)
298-
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL1]]
294+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND8]](s32)
295+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND11]](s32)
296+
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32)
297+
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]]
299298
; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
300299
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
301300
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
302301
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
303302
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1)
304303
; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1)
305304
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
306-
; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]]
307-
; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]]
308-
; CHECK-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]]
309-
; CHECK-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]]
310-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND16]](s32), [[AND17]](s32), [[AND18]](s32), [[AND19]](s32)
305+
; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]]
306+
; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]]
307+
; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]]
308+
; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]]
309+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND12]](s32), [[AND13]](s32), [[AND14]](s32), [[AND15]](s32)
311310
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
312311
; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>)
313312
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1

0 commit comments

Comments
 (0)