@@ -8,34 +8,35 @@ define amdgpu_kernel void @v_mul_i64_no_zext(ptr addrspace(1) %out, ptr addrspac
8
8
; GFX10-LABEL: v_mul_i64_no_zext:
9
9
; GFX10: ; %bb.0:
10
10
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c
11
- ; GFX10-NEXT: v_lshlrev_b32_e32 v6 , 3, v0
11
+ ; GFX10-NEXT: v_lshlrev_b32_e32 v7 , 3, v0
12
12
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
13
13
; GFX10-NEXT: s_clause 0x1
14
- ; GFX10-NEXT: global_load_dwordx2 v[0:1], v6 , s[0:1]
15
- ; GFX10-NEXT: global_load_dwordx2 v[2:3], v6 , s[2:3]
14
+ ; GFX10-NEXT: global_load_dwordx2 v[0:1], v7 , s[0:1]
15
+ ; GFX10-NEXT: global_load_dwordx2 v[2:3], v7 , s[2:3]
16
16
; GFX10-NEXT: s_waitcnt vmcnt(0)
17
17
; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, v0, v2, 0
18
- ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v3
19
- ; GFX10-NEXT: v_mul_lo_u32 v1, v1, v2
20
- ; GFX10-NEXT: v_add3_u32 v5, v5, v0, v1
21
- ; GFX10-NEXT: global_store_dwordx2 v6 , v[4:5], s[2:3]
18
+ ; GFX10-NEXT: v_mad_u64_u32 v[5:6], s0, v0, v3, v[5:6]
19
+ ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v1, v2, v[5:6]
20
+ ; GFX10-NEXT: v_mov_b32_e32 v5, v0
21
+ ; GFX10-NEXT: global_store_dwordx2 v7 , v[4:5], s[2:3]
22
22
; GFX10-NEXT: s_endpgm
23
23
;
24
24
; GFX11-LABEL: v_mul_i64_no_zext:
25
25
; GFX11: ; %bb.0:
26
26
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x2c
27
- ; GFX11-NEXT: v_lshlrev_b32_e32 v6 , 3, v0
27
+ ; GFX11-NEXT: v_lshlrev_b32_e32 v9 , 3, v0
28
28
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
29
29
; GFX11-NEXT: s_clause 0x1
30
- ; GFX11-NEXT: global_load_b64 v[0:1], v6 , s[0:1]
31
- ; GFX11-NEXT: global_load_b64 v[2:3], v6 , s[2:3]
30
+ ; GFX11-NEXT: global_load_b64 v[0:1], v9 , s[0:1]
31
+ ; GFX11-NEXT: global_load_b64 v[2:3], v9 , s[2:3]
32
32
; GFX11-NEXT: s_waitcnt vmcnt(0)
33
33
; GFX11-NEXT: v_mad_u64_u32 v[4:5], null, v0, v2, 0
34
- ; GFX11-NEXT: v_mul_lo_u32 v0, v0, v3
35
- ; GFX11-NEXT: v_mul_lo_u32 v1, v1, v2
34
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
35
+ ; GFX11-NEXT: v_mad_u64_u32 v[6:7], null, v0, v3, v[5:6]
36
+ ; GFX11-NEXT: v_mad_u64_u32 v[7:8], null, v1, v2, v[6:7]
36
37
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
37
- ; GFX11-NEXT: v_add3_u32 v5, v5, v0, v1
38
- ; GFX11-NEXT: global_store_b64 v6 , v[4:5], s[2:3]
38
+ ; GFX11-NEXT: v_mov_b32_e32 v5, v7
39
+ ; GFX11-NEXT: global_store_b64 v9 , v[4:5], s[2:3]
39
40
; GFX11-NEXT: s_nop 0
40
41
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
41
42
; GFX11-NEXT: s_endpgm
@@ -64,8 +65,9 @@ define amdgpu_kernel void @v_mul_i64_zext_src1(ptr addrspace(1) %out, ptr addrsp
64
65
; GFX10-NEXT: global_load_dword v4, v3, s[2:3]
65
66
; GFX10-NEXT: s_waitcnt vmcnt(0)
66
67
; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, v0, v4, 0
67
- ; GFX10-NEXT: v_mul_lo_u32 v0, v1, v4
68
- ; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v0
68
+ ; GFX10-NEXT: v_mov_b32_e32 v0, v3
69
+ ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v1, v4, v[0:1]
70
+ ; GFX10-NEXT: v_mov_b32_e32 v3, v0
69
71
; GFX10-NEXT: v_mov_b32_e32 v0, 0
70
72
; GFX10-NEXT: global_store_dwordx2 v0, v[2:3], s[4:5]
71
73
; GFX10-NEXT: s_endpgm
@@ -79,12 +81,13 @@ define amdgpu_kernel void @v_mul_i64_zext_src1(ptr addrspace(1) %out, ptr addrsp
79
81
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 2, v0
80
82
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
81
83
; GFX11-NEXT: global_load_b64 v[0:1], v1, s[6:7]
82
- ; GFX11-NEXT: global_load_b32 v4 , v2, s[0:1]
84
+ ; GFX11-NEXT: global_load_b32 v5 , v2, s[0:1]
83
85
; GFX11-NEXT: s_waitcnt vmcnt(0)
84
- ; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v0, v4, 0
85
- ; GFX11-NEXT: v_mul_lo_u32 v0, v1, v4
86
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
87
- ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_add_nc_u32 v3, v3, v0
86
+ ; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v0, v5, 0
87
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
88
+ ; GFX11-NEXT: v_mov_b32_e32 v0, v3
89
+ ; GFX11-NEXT: v_mad_u64_u32 v[3:4], null, v1, v5, v[0:1]
90
+ ; GFX11-NEXT: v_mov_b32_e32 v0, 0
88
91
; GFX11-NEXT: global_store_b64 v0, v[2:3], s[4:5]
89
92
; GFX11-NEXT: s_nop 0
90
93
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -114,8 +117,9 @@ define amdgpu_kernel void @v_mul_i64_zext_src0(ptr addrspace(1) %out, ptr addrsp
114
117
; GFX10-NEXT: global_load_dwordx2 v[0:1], v3, s[2:3]
115
118
; GFX10-NEXT: s_waitcnt vmcnt(0)
116
119
; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, v4, v0, 0
117
- ; GFX10-NEXT: v_mul_lo_u32 v0, v4, v1
118
- ; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v0
120
+ ; GFX10-NEXT: v_mov_b32_e32 v0, v3
121
+ ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v4, v1, v[0:1]
122
+ ; GFX10-NEXT: v_mov_b32_e32 v3, v0
119
123
; GFX10-NEXT: v_mov_b32_e32 v0, 0
120
124
; GFX10-NEXT: global_store_dwordx2 v0, v[2:3], s[4:5]
121
125
; GFX10-NEXT: s_endpgm
@@ -128,13 +132,14 @@ define amdgpu_kernel void @v_mul_i64_zext_src0(ptr addrspace(1) %out, ptr addrsp
128
132
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v0
129
133
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
130
134
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
131
- ; GFX11-NEXT: global_load_b32 v4 , v1, s[6:7]
135
+ ; GFX11-NEXT: global_load_b32 v5 , v1, s[6:7]
132
136
; GFX11-NEXT: global_load_b64 v[0:1], v0, s[0:1]
133
137
; GFX11-NEXT: s_waitcnt vmcnt(0)
134
- ; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v4, v0, 0
135
- ; GFX11-NEXT: v_mul_lo_u32 v0, v4, v1
136
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
137
- ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_add_nc_u32 v3, v3, v0
138
+ ; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v5, v0, 0
139
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
140
+ ; GFX11-NEXT: v_mov_b32_e32 v0, v3
141
+ ; GFX11-NEXT: v_mad_u64_u32 v[3:4], null, v5, v1, v[0:1]
142
+ ; GFX11-NEXT: v_mov_b32_e32 v0, 0
138
143
; GFX11-NEXT: global_store_b64 v0, v[2:3], s[4:5]
139
144
; GFX11-NEXT: s_nop 0
140
145
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -211,8 +216,9 @@ define amdgpu_kernel void @v_mul_i64_masked_src0_hi(ptr addrspace(1) %out, ptr a
211
216
; GFX10-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3]
212
217
; GFX10-NEXT: s_waitcnt vmcnt(0)
213
218
; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, v4, v0, 0
214
- ; GFX10-NEXT: v_mul_lo_u32 v0, v4, v1
215
- ; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v0
219
+ ; GFX10-NEXT: v_mov_b32_e32 v0, v3
220
+ ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v4, v1, v[0:1]
221
+ ; GFX10-NEXT: v_mov_b32_e32 v3, v0
216
222
; GFX10-NEXT: v_mov_b32_e32 v0, 0
217
223
; GFX10-NEXT: global_store_dwordx2 v0, v[2:3], s[4:5]
218
224
; GFX10-NEXT: s_endpgm
@@ -225,13 +231,14 @@ define amdgpu_kernel void @v_mul_i64_masked_src0_hi(ptr addrspace(1) %out, ptr a
225
231
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
226
232
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
227
233
; GFX11-NEXT: s_clause 0x1
228
- ; GFX11-NEXT: global_load_b32 v4 , v0, s[6:7]
234
+ ; GFX11-NEXT: global_load_b32 v5 , v0, s[6:7]
229
235
; GFX11-NEXT: global_load_b64 v[0:1], v0, s[0:1]
230
236
; GFX11-NEXT: s_waitcnt vmcnt(0)
231
- ; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v4, v0, 0
232
- ; GFX11-NEXT: v_mul_lo_u32 v0, v4, v1
233
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
234
- ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_add_nc_u32 v3, v3, v0
237
+ ; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v5, v0, 0
238
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
239
+ ; GFX11-NEXT: v_mov_b32_e32 v0, v3
240
+ ; GFX11-NEXT: v_mad_u64_u32 v[3:4], null, v5, v1, v[0:1]
241
+ ; GFX11-NEXT: v_mov_b32_e32 v0, 0
235
242
; GFX11-NEXT: global_store_b64 v0, v[2:3], s[4:5]
236
243
; GFX11-NEXT: s_nop 0
237
244
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -390,15 +397,16 @@ define amdgpu_kernel void @v_mul_i64_partially_masked_src0(ptr addrspace(1) %out
390
397
; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[6:7]
391
398
; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[2:3]
392
399
; GFX10-NEXT: s_waitcnt vmcnt(1)
393
- ; GFX10-NEXT: v_and_b32_e32 v4, 0xfff00000, v0
394
- ; GFX10-NEXT: v_and_b32_e32 v5, 0xf00f, v1
400
+ ; GFX10-NEXT: v_and_b32_e32 v6, 0xfff00000, v0
395
401
; GFX10-NEXT: s_waitcnt vmcnt(0)
396
- ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v4, v2, 0
397
- ; GFX10-NEXT: v_mul_lo_u32 v3, v4, v3
398
- ; GFX10-NEXT: v_mul_lo_u32 v2, v5, v2
399
- ; GFX10-NEXT: v_add3_u32 v1, v1, v3, v2
400
- ; GFX10-NEXT: v_mov_b32_e32 v2, 0
401
- ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
402
+ ; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, v6, v2, 0
403
+ ; GFX10-NEXT: v_mov_b32_e32 v0, v5
404
+ ; GFX10-NEXT: v_mad_u64_u32 v[5:6], s0, v6, v3, v[0:1]
405
+ ; GFX10-NEXT: v_and_b32_e32 v0, 0xf00f, v1
406
+ ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v0, v2, v[5:6]
407
+ ; GFX10-NEXT: v_mov_b32_e32 v5, v0
408
+ ; GFX10-NEXT: v_mov_b32_e32 v0, 0
409
+ ; GFX10-NEXT: global_store_dwordx2 v0, v[4:5], s[4:5]
402
410
; GFX10-NEXT: s_endpgm
403
411
;
404
412
; GFX11-LABEL: v_mul_i64_partially_masked_src0:
@@ -412,17 +420,18 @@ define amdgpu_kernel void @v_mul_i64_partially_masked_src0(ptr addrspace(1) %out
412
420
; GFX11-NEXT: global_load_b64 v[0:1], v2, s[6:7]
413
421
; GFX11-NEXT: global_load_b64 v[2:3], v2, s[0:1]
414
422
; GFX11-NEXT: s_waitcnt vmcnt(1)
415
- ; GFX11-NEXT: v_and_b32_e32 v4, 0xfff00000, v0
416
- ; GFX11-NEXT: v_and_b32_e32 v5, 0xf00f, v1
423
+ ; GFX11-NEXT: v_and_b32_e32 v7, 0xfff00000, v0
417
424
; GFX11-NEXT: s_waitcnt vmcnt(0)
418
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
419
- ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v4, v2, 0
420
- ; GFX11-NEXT: v_mul_lo_u32 v3, v4, v3
421
- ; GFX11-NEXT: v_mul_lo_u32 v2, v5, v2
425
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
426
+ ; GFX11-NEXT: v_mad_u64_u32 v[4:5], null, v7, v2, 0
427
+ ; GFX11-NEXT: v_mov_b32_e32 v0, v5
428
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
429
+ ; GFX11-NEXT: v_mad_u64_u32 v[5:6], null, v7, v3, v[0:1]
430
+ ; GFX11-NEXT: v_and_b32_e32 v3, 0xf00f, v1
431
+ ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, v[5:6]
422
432
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
423
- ; GFX11-NEXT: v_add3_u32 v1, v1, v3, v2
424
- ; GFX11-NEXT: v_mov_b32_e32 v2, 0
425
- ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
433
+ ; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v0, 0
434
+ ; GFX11-NEXT: global_store_b64 v0, v[4:5], s[4:5]
426
435
; GFX11-NEXT: s_nop 0
427
436
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
428
437
; GFX11-NEXT: s_endpgm
@@ -491,27 +500,31 @@ define amdgpu_kernel void @v_mul64_masked_before_and_in_branch(ptr addrspace(1)
491
500
; GFX10-NEXT: s_clause 0x1
492
501
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
493
502
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
494
- ; GFX10-NEXT: v_lshlrev_b32_e32 v4 , 3, v0
503
+ ; GFX10-NEXT: v_lshlrev_b32_e32 v0 , 3, v0
495
504
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
496
505
; GFX10-NEXT: s_clause 0x1
497
- ; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[6:7]
498
- ; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[2:3]
506
+ ; GFX10-NEXT: global_load_dwordx2 v[2:3], v0, s[6:7]
507
+ ; GFX10-NEXT: global_load_dwordx2 v[4:5], v0, s[2:3]
508
+ ; GFX10-NEXT: ; implicit-def: $vgpr0_vgpr1
499
509
; GFX10-NEXT: s_waitcnt vmcnt(1)
500
510
; GFX10-NEXT: v_cmp_ge_u64_e32 vcc_lo, 0, v[2:3]
501
- ; GFX10-NEXT: s_waitcnt vmcnt(0)
502
- ; GFX10-NEXT: v_mul_lo_u32 v1, v2, v1
503
511
; GFX10-NEXT: s_and_saveexec_b32 s0, vcc_lo
504
512
; GFX10-NEXT: s_xor_b32 s0, exec_lo, s0
513
+ ; GFX10-NEXT: s_cbranch_execz .LBB10_2
505
514
; GFX10-NEXT: ; %bb.1: ; %else
506
- ; GFX10-NEXT: v_mad_u64_u32 v[2:3], s1, v2, v0, 0
507
- ; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v1
508
- ; GFX10-NEXT: v_mov_b32_e32 v0, v2
509
- ; GFX10-NEXT: v_mov_b32_e32 v1, v3
510
- ; GFX10-NEXT: ; %bb.2: ; %Flow
515
+ ; GFX10-NEXT: s_waitcnt vmcnt(0)
516
+ ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s1, v2, v4, 0
517
+ ; GFX10-NEXT: v_mad_u64_u32 v[1:2], s1, v2, v5, v[1:2]
518
+ ; GFX10-NEXT: ; implicit-def: $vgpr2_vgpr3
519
+ ; GFX10-NEXT: ; implicit-def: $vgpr4_vgpr5
520
+ ; GFX10-NEXT: .LBB10_2: ; %Flow
511
521
; GFX10-NEXT: s_andn2_saveexec_b32 s0, s0
522
+ ; GFX10-NEXT: s_cbranch_execz .LBB10_4
512
523
; GFX10-NEXT: ; %bb.3: ; %if
524
+ ; GFX10-NEXT: s_waitcnt vmcnt(0)
525
+ ; GFX10-NEXT: v_mul_lo_u32 v1, v2, v5
513
526
; GFX10-NEXT: v_mov_b32_e32 v0, 0
514
- ; GFX10-NEXT: ; %bb.4 : ; %endif
527
+ ; GFX10-NEXT: .LBB10_4 : ; %endif
515
528
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0
516
529
; GFX10-NEXT: v_mov_b32_e32 v2, 0
517
530
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
@@ -526,22 +539,29 @@ define amdgpu_kernel void @v_mul64_masked_before_and_in_branch(ptr addrspace(1)
526
539
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
527
540
; GFX11-NEXT: s_clause 0x1
528
541
; GFX11-NEXT: global_load_b64 v[2:3], v0, s[6:7]
529
- ; GFX11-NEXT: global_load_b64 v[0:1 ], v0, s[0:1]
542
+ ; GFX11-NEXT: global_load_b64 v[4:5 ], v0, s[0:1]
530
543
; GFX11-NEXT: s_mov_b32 s0, exec_lo
531
- ; GFX11-NEXT: s_waitcnt vmcnt(0)
532
- ; GFX11-NEXT: v_mul_lo_u32 v1, v2, v1
544
+ ; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1
545
+ ; GFX11-NEXT: s_waitcnt vmcnt(1)
533
546
; GFX11-NEXT: v_cmpx_ge_u64_e32 0, v[2:3]
534
547
; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0
548
+ ; GFX11-NEXT: s_cbranch_execz .LBB10_2
535
549
; GFX11-NEXT: ; %bb.1: ; %else
536
- ; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v2, v0, 0
550
+ ; GFX11-NEXT: s_waitcnt vmcnt(0)
551
+ ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v2, v4, 0
537
552
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
538
- ; GFX11-NEXT: v_add_nc_u32_e32 v3, v3, v1
539
- ; GFX11-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
540
- ; GFX11-NEXT: ; %bb.2: ; %Flow
553
+ ; GFX11-NEXT: v_mad_u64_u32 v[3:4], null, v2, v5, v[1:2]
554
+ ; GFX11-NEXT: ; implicit-def: $vgpr4_vgpr5
555
+ ; GFX11-NEXT: v_mov_b32_e32 v1, v3
556
+ ; GFX11-NEXT: ; implicit-def: $vgpr2_vgpr3
557
+ ; GFX11-NEXT: .LBB10_2: ; %Flow
541
558
; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0
559
+ ; GFX11-NEXT: s_cbranch_execz .LBB10_4
542
560
; GFX11-NEXT: ; %bb.3: ; %if
561
+ ; GFX11-NEXT: s_waitcnt vmcnt(0)
562
+ ; GFX11-NEXT: v_mul_lo_u32 v1, v2, v5
543
563
; GFX11-NEXT: v_mov_b32_e32 v0, 0
544
- ; GFX11-NEXT: ; %bb.4 : ; %endif
564
+ ; GFX11-NEXT: .LBB10_4 : ; %endif
545
565
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0
546
566
; GFX11-NEXT: v_mov_b32_e32 v2, 0
547
567
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
0 commit comments