Skip to content

Commit aacebae

Browse files
authored
[AMDGPU] Do not promote uniform i16 operations to i32 in CGP (#140208)
For the majority of cases, this is a neutral or positive change. There are even testcases that greatly benefit from it, but some regressions are possible. There is #140040 for GlobalISel that'd need to be fixed but it's only a one instruction regression and I think it can be fixed later. Solves #64591
1 parent 5e7bc5e commit aacebae

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+17565
-19032
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,10 @@ static cl::opt<bool> WidenLoads(
4747
cl::init(false));
4848

4949
static cl::opt<bool> Widen16BitOps(
50-
"amdgpu-codegenprepare-widen-16-bit-ops",
51-
cl::desc("Widen uniform 16-bit instructions to 32-bit in AMDGPUCodeGenPrepare"),
52-
cl::ReallyHidden,
53-
cl::init(true));
50+
"amdgpu-codegenprepare-widen-16-bit-ops",
51+
cl::desc(
52+
"Widen uniform 16-bit instructions to 32-bit in AMDGPUCodeGenPrepare"),
53+
cl::ReallyHidden, cl::init(false));
5454

5555
static cl::opt<bool>
5656
BreakLargePHIs("amdgpu-codegenprepare-break-large-phis",

llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll

Lines changed: 32 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -281,12 +281,12 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) {
281281
; GFX8-LABEL: s_add_v2i16_neg_inline_imm_splat:
282282
; GFX8: ; %bb.0:
283283
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
284-
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
285-
; GFX8-NEXT: s_add_i32 s0, s0, 0xffc0
286284
; GFX8-NEXT: s_add_i32 s1, s1, 0xffc0
285+
; GFX8-NEXT: s_add_i32 s0, s0, 0xffc0
286+
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
287+
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
287288
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
288-
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
289-
; GFX8-NEXT: s_or_b32 s0, s1, s0
289+
; GFX8-NEXT: s_or_b32 s0, s0, s1
290290
; GFX8-NEXT: ; return to shader part epilog
291291
;
292292
; GFX10-LABEL: s_add_v2i16_neg_inline_imm_splat:
@@ -323,12 +323,12 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) {
323323
; GFX8-LABEL: s_add_v2i16_neg_inline_imm_lo:
324324
; GFX8: ; %bb.0:
325325
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
326-
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
327-
; GFX8-NEXT: s_add_i32 s0, s0, 0xffc0
328326
; GFX8-NEXT: s_add_i32 s1, s1, 4
327+
; GFX8-NEXT: s_add_i32 s0, s0, 0xffc0
328+
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
329+
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
329330
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
330-
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
331-
; GFX8-NEXT: s_or_b32 s0, s1, s0
331+
; GFX8-NEXT: s_or_b32 s0, s0, s1
332332
; GFX8-NEXT: ; return to shader part epilog
333333
;
334334
; GFX10-LABEL: s_add_v2i16_neg_inline_imm_lo:
@@ -365,12 +365,12 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) {
365365
; GFX8-LABEL: s_add_v2i16_neg_inline_imm_hi:
366366
; GFX8: ; %bb.0:
367367
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
368-
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
369-
; GFX8-NEXT: s_add_i32 s0, s0, 4
370368
; GFX8-NEXT: s_add_i32 s1, s1, 0xffc0
369+
; GFX8-NEXT: s_add_i32 s0, s0, 4
370+
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
371+
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
371372
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
372-
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
373-
; GFX8-NEXT: s_or_b32 s0, s1, s0
373+
; GFX8-NEXT: s_or_b32 s0, s0, s1
374374
; GFX8-NEXT: ; return to shader part epilog
375375
;
376376
; GFX10-LABEL: s_add_v2i16_neg_inline_imm_hi:
@@ -408,14 +408,13 @@ define amdgpu_ps i32 @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) {
408408
; GFX8-LABEL: s_add_v2i16:
409409
; GFX8: ; %bb.0:
410410
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
411-
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
412411
; GFX8-NEXT: s_lshr_b32 s3, s1, 16
413-
; GFX8-NEXT: s_and_b32 s1, s1, 0xffff
414-
; GFX8-NEXT: s_add_i32 s0, s0, s1
415412
; GFX8-NEXT: s_add_i32 s2, s2, s3
416-
; GFX8-NEXT: s_lshl_b32 s1, s2, 16
417-
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
418-
; GFX8-NEXT: s_or_b32 s0, s1, s0
413+
; GFX8-NEXT: s_add_i32 s0, s0, s1
414+
; GFX8-NEXT: s_and_b32 s1, 0xffff, s2
415+
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
416+
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
417+
; GFX8-NEXT: s_or_b32 s0, s0, s1
419418
; GFX8-NEXT: ; return to shader part epilog
420419
;
421420
; GFX10-LABEL: s_add_v2i16:
@@ -461,14 +460,13 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg
461460
; GFX8: ; %bb.0:
462461
; GFX8-NEXT: s_xor_b32 s0, s0, 0x80008000
463462
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
464-
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
465463
; GFX8-NEXT: s_lshr_b32 s3, s1, 16
466-
; GFX8-NEXT: s_and_b32 s1, s1, 0xffff
467-
; GFX8-NEXT: s_add_i32 s0, s0, s1
468464
; GFX8-NEXT: s_add_i32 s2, s2, s3
469-
; GFX8-NEXT: s_lshl_b32 s1, s2, 16
470-
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
471-
; GFX8-NEXT: s_or_b32 s0, s1, s0
465+
; GFX8-NEXT: s_add_i32 s0, s0, s1
466+
; GFX8-NEXT: s_and_b32 s1, 0xffff, s2
467+
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
468+
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
469+
; GFX8-NEXT: s_or_b32 s0, s0, s1
472470
; GFX8-NEXT: ; return to shader part epilog
473471
;
474472
; GFX10-LABEL: s_add_v2i16_fneg_lhs:
@@ -517,14 +515,13 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg
517515
; GFX8: ; %bb.0:
518516
; GFX8-NEXT: s_xor_b32 s1, s1, 0x80008000
519517
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
520-
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
521518
; GFX8-NEXT: s_lshr_b32 s3, s1, 16
522-
; GFX8-NEXT: s_and_b32 s1, s1, 0xffff
523-
; GFX8-NEXT: s_add_i32 s0, s0, s1
524519
; GFX8-NEXT: s_add_i32 s2, s2, s3
525-
; GFX8-NEXT: s_lshl_b32 s1, s2, 16
526-
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
527-
; GFX8-NEXT: s_or_b32 s0, s1, s0
520+
; GFX8-NEXT: s_add_i32 s0, s0, s1
521+
; GFX8-NEXT: s_and_b32 s1, 0xffff, s2
522+
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
523+
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
524+
; GFX8-NEXT: s_or_b32 s0, s0, s1
528525
; GFX8-NEXT: ; return to shader part epilog
529526
;
530527
; GFX10-LABEL: s_add_v2i16_fneg_rhs:
@@ -580,14 +577,13 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> inreg %a, <2 x ha
580577
; GFX8-NEXT: s_xor_b32 s0, s0, 0x80008000
581578
; GFX8-NEXT: s_xor_b32 s1, s1, 0x80008000
582579
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
583-
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
584580
; GFX8-NEXT: s_lshr_b32 s3, s1, 16
585-
; GFX8-NEXT: s_and_b32 s1, s1, 0xffff
586-
; GFX8-NEXT: s_add_i32 s0, s0, s1
587581
; GFX8-NEXT: s_add_i32 s2, s2, s3
588-
; GFX8-NEXT: s_lshl_b32 s1, s2, 16
589-
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
590-
; GFX8-NEXT: s_or_b32 s0, s1, s0
582+
; GFX8-NEXT: s_add_i32 s0, s0, s1
583+
; GFX8-NEXT: s_and_b32 s1, 0xffff, s2
584+
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
585+
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
586+
; GFX8-NEXT: s_or_b32 s0, s0, s1
591587
; GFX8-NEXT: ; return to shader part epilog
592588
;
593589
; GFX10-LABEL: s_add_v2i16_fneg_lhs_fneg_rhs:

llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll

Lines changed: 68 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -70,30 +70,15 @@ define i8 @v_ashr_i8_7(i8 %value) {
7070
}
7171

7272
define amdgpu_ps i8 @s_ashr_i8(i8 inreg %value, i8 inreg %amount) {
73-
; GFX6-LABEL: s_ashr_i8:
74-
; GFX6: ; %bb.0:
75-
; GFX6-NEXT: s_sext_i32_i8 s0, s0
76-
; GFX6-NEXT: s_ashr_i32 s0, s0, s1
77-
; GFX6-NEXT: ; return to shader part epilog
78-
;
79-
; GFX8-LABEL: s_ashr_i8:
80-
; GFX8: ; %bb.0:
81-
; GFX8-NEXT: s_sext_i32_i8 s0, s0
82-
; GFX8-NEXT: s_sext_i32_i8 s1, s1
83-
; GFX8-NEXT: s_ashr_i32 s0, s0, s1
84-
; GFX8-NEXT: ; return to shader part epilog
85-
;
86-
; GFX9-LABEL: s_ashr_i8:
87-
; GFX9: ; %bb.0:
88-
; GFX9-NEXT: s_sext_i32_i8 s0, s0
89-
; GFX9-NEXT: s_sext_i32_i8 s1, s1
90-
; GFX9-NEXT: s_ashr_i32 s0, s0, s1
91-
; GFX9-NEXT: ; return to shader part epilog
73+
; GCN-LABEL: s_ashr_i8:
74+
; GCN: ; %bb.0:
75+
; GCN-NEXT: s_sext_i32_i8 s0, s0
76+
; GCN-NEXT: s_ashr_i32 s0, s0, s1
77+
; GCN-NEXT: ; return to shader part epilog
9278
;
9379
; GFX10PLUS-LABEL: s_ashr_i8:
9480
; GFX10PLUS: ; %bb.0:
9581
; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0
96-
; GFX10PLUS-NEXT: s_sext_i32_i8 s1, s1
9782
; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1
9883
; GFX10PLUS-NEXT: ; return to shader part epilog
9984
%result = ashr i8 %value, %amount
@@ -642,30 +627,15 @@ define i16 @v_ashr_i16_15(i16 %value) {
642627
}
643628

644629
define amdgpu_ps i16 @s_ashr_i16(i16 inreg %value, i16 inreg %amount) {
645-
; GFX6-LABEL: s_ashr_i16:
646-
; GFX6: ; %bb.0:
647-
; GFX6-NEXT: s_sext_i32_i16 s0, s0
648-
; GFX6-NEXT: s_ashr_i32 s0, s0, s1
649-
; GFX6-NEXT: ; return to shader part epilog
650-
;
651-
; GFX8-LABEL: s_ashr_i16:
652-
; GFX8: ; %bb.0:
653-
; GFX8-NEXT: s_sext_i32_i16 s0, s0
654-
; GFX8-NEXT: s_sext_i32_i16 s1, s1
655-
; GFX8-NEXT: s_ashr_i32 s0, s0, s1
656-
; GFX8-NEXT: ; return to shader part epilog
657-
;
658-
; GFX9-LABEL: s_ashr_i16:
659-
; GFX9: ; %bb.0:
660-
; GFX9-NEXT: s_sext_i32_i16 s0, s0
661-
; GFX9-NEXT: s_sext_i32_i16 s1, s1
662-
; GFX9-NEXT: s_ashr_i32 s0, s0, s1
663-
; GFX9-NEXT: ; return to shader part epilog
630+
; GCN-LABEL: s_ashr_i16:
631+
; GCN: ; %bb.0:
632+
; GCN-NEXT: s_sext_i32_i16 s0, s0
633+
; GCN-NEXT: s_ashr_i32 s0, s0, s1
634+
; GCN-NEXT: ; return to shader part epilog
664635
;
665636
; GFX10PLUS-LABEL: s_ashr_i16:
666637
; GFX10PLUS: ; %bb.0:
667638
; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0
668-
; GFX10PLUS-NEXT: s_sext_i32_i16 s1, s1
669639
; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1
670640
; GFX10PLUS-NEXT: ; return to shader part epilog
671641
%result = ashr i16 %value, %amount
@@ -826,14 +796,15 @@ define amdgpu_ps i32 @s_ashr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amou
826796
;
827797
; GFX8-LABEL: s_ashr_v2i16:
828798
; GFX8: ; %bb.0:
829-
; GFX8-NEXT: s_sext_i32_i16 s2, s0
830-
; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
831-
; GFX8-NEXT: s_sext_i32_i16 s3, s1
832-
; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
833-
; GFX8-NEXT: s_ashr_i32 s2, s2, s3
799+
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
800+
; GFX8-NEXT: s_sext_i32_i16 s0, s0
801+
; GFX8-NEXT: s_lshr_b32 s3, s1, 16
834802
; GFX8-NEXT: s_ashr_i32 s0, s0, s1
835-
; GFX8-NEXT: s_lshl_b32 s0, s0, 16
836-
; GFX8-NEXT: s_and_b32 s1, s2, 0xffff
803+
; GFX8-NEXT: s_sext_i32_i16 s1, s2
804+
; GFX8-NEXT: s_ashr_i32 s1, s1, s3
805+
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
806+
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
807+
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
837808
; GFX8-NEXT: s_or_b32 s0, s0, s1
838809
; GFX8-NEXT: ; return to shader part epilog
839810
;
@@ -1028,23 +999,25 @@ define amdgpu_ps <2 x i32> @s_ashr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg
1028999
;
10291000
; GFX8-LABEL: s_ashr_v4i16:
10301001
; GFX8: ; %bb.0:
1031-
; GFX8-NEXT: s_sext_i32_i16 s4, s0
1032-
; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
1033-
; GFX8-NEXT: s_sext_i32_i16 s5, s1
1034-
; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
1035-
; GFX8-NEXT: s_sext_i32_i16 s6, s2
1036-
; GFX8-NEXT: s_bfe_i32 s2, s2, 0x100010
1037-
; GFX8-NEXT: s_sext_i32_i16 s7, s3
1038-
; GFX8-NEXT: s_bfe_i32 s3, s3, 0x100010
1039-
; GFX8-NEXT: s_ashr_i32 s4, s4, s6
1002+
; GFX8-NEXT: s_lshr_b32 s4, s0, 16
1003+
; GFX8-NEXT: s_sext_i32_i16 s0, s0
1004+
; GFX8-NEXT: s_lshr_b32 s6, s2, 16
10401005
; GFX8-NEXT: s_ashr_i32 s0, s0, s2
1041-
; GFX8-NEXT: s_ashr_i32 s2, s5, s7
1006+
; GFX8-NEXT: s_sext_i32_i16 s2, s4
1007+
; GFX8-NEXT: s_lshr_b32 s5, s1, 16
1008+
; GFX8-NEXT: s_ashr_i32 s2, s2, s6
1009+
; GFX8-NEXT: s_sext_i32_i16 s1, s1
1010+
; GFX8-NEXT: s_lshr_b32 s7, s3, 16
10421011
; GFX8-NEXT: s_ashr_i32 s1, s1, s3
1043-
; GFX8-NEXT: s_lshl_b32 s0, s0, 16
1044-
; GFX8-NEXT: s_and_b32 s3, s4, 0xffff
1045-
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1046-
; GFX8-NEXT: s_and_b32 s2, s2, 0xffff
1047-
; GFX8-NEXT: s_or_b32 s0, s0, s3
1012+
; GFX8-NEXT: s_sext_i32_i16 s3, s5
1013+
; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1014+
; GFX8-NEXT: s_ashr_i32 s3, s3, s7
1015+
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
1016+
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1017+
; GFX8-NEXT: s_or_b32 s0, s0, s2
1018+
; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
1019+
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1020+
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
10481021
; GFX8-NEXT: s_or_b32 s1, s1, s2
10491022
; GFX8-NEXT: ; return to shader part epilog
10501023
;
@@ -1235,41 +1208,45 @@ define amdgpu_ps <4 x i32> @s_ashr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg
12351208
;
12361209
; GFX8-LABEL: s_ashr_v8i16:
12371210
; GFX8: ; %bb.0:
1238-
; GFX8-NEXT: s_sext_i32_i16 s8, s0
1239-
; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
1240-
; GFX8-NEXT: s_sext_i32_i16 s9, s1
1241-
; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
1242-
; GFX8-NEXT: s_sext_i32_i16 s12, s4
1243-
; GFX8-NEXT: s_bfe_i32 s4, s4, 0x100010
1244-
; GFX8-NEXT: s_sext_i32_i16 s13, s5
1245-
; GFX8-NEXT: s_bfe_i32 s5, s5, 0x100010
1246-
; GFX8-NEXT: s_sext_i32_i16 s10, s2
1247-
; GFX8-NEXT: s_bfe_i32 s2, s2, 0x100010
1248-
; GFX8-NEXT: s_sext_i32_i16 s14, s6
1249-
; GFX8-NEXT: s_bfe_i32 s6, s6, 0x100010
1211+
; GFX8-NEXT: s_lshr_b32 s8, s0, 16
1212+
; GFX8-NEXT: s_sext_i32_i16 s0, s0
1213+
; GFX8-NEXT: s_lshr_b32 s12, s4, 16
12501214
; GFX8-NEXT: s_ashr_i32 s0, s0, s4
1251-
; GFX8-NEXT: s_ashr_i32 s4, s9, s13
1215+
; GFX8-NEXT: s_sext_i32_i16 s4, s8
1216+
; GFX8-NEXT: s_lshr_b32 s9, s1, 16
1217+
; GFX8-NEXT: s_ashr_i32 s4, s4, s12
1218+
; GFX8-NEXT: s_sext_i32_i16 s1, s1
1219+
; GFX8-NEXT: s_lshr_b32 s13, s5, 16
12521220
; GFX8-NEXT: s_ashr_i32 s1, s1, s5
1253-
; GFX8-NEXT: s_sext_i32_i16 s11, s3
1254-
; GFX8-NEXT: s_bfe_i32 s3, s3, 0x100010
1255-
; GFX8-NEXT: s_sext_i32_i16 s15, s7
1256-
; GFX8-NEXT: s_bfe_i32 s7, s7, 0x100010
1257-
; GFX8-NEXT: s_ashr_i32 s5, s10, s14
1221+
; GFX8-NEXT: s_sext_i32_i16 s5, s9
1222+
; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
1223+
; GFX8-NEXT: s_lshr_b32 s10, s2, 16
1224+
; GFX8-NEXT: s_ashr_i32 s5, s5, s13
1225+
; GFX8-NEXT: s_sext_i32_i16 s2, s2
1226+
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
1227+
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1228+
; GFX8-NEXT: s_lshr_b32 s14, s6, 16
12581229
; GFX8-NEXT: s_ashr_i32 s2, s2, s6
1259-
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1260-
; GFX8-NEXT: s_and_b32 s4, s4, 0xffff
1261-
; GFX8-NEXT: s_ashr_i32 s8, s8, s12
1262-
; GFX8-NEXT: s_ashr_i32 s6, s11, s15
1230+
; GFX8-NEXT: s_sext_i32_i16 s6, s10
1231+
; GFX8-NEXT: s_or_b32 s0, s0, s4
1232+
; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
1233+
; GFX8-NEXT: s_lshr_b32 s11, s3, 16
1234+
; GFX8-NEXT: s_ashr_i32 s6, s6, s14
1235+
; GFX8-NEXT: s_sext_i32_i16 s3, s3
1236+
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1237+
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1238+
; GFX8-NEXT: s_lshr_b32 s15, s7, 16
12631239
; GFX8-NEXT: s_ashr_i32 s3, s3, s7
1240+
; GFX8-NEXT: s_sext_i32_i16 s7, s11
12641241
; GFX8-NEXT: s_or_b32 s1, s1, s4
1265-
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1266-
; GFX8-NEXT: s_and_b32 s4, s5, 0xffff
1267-
; GFX8-NEXT: s_lshl_b32 s0, s0, 16
1268-
; GFX8-NEXT: s_and_b32 s7, s8, 0xffff
1242+
; GFX8-NEXT: s_and_b32 s4, 0xffff, s6
1243+
; GFX8-NEXT: s_ashr_i32 s7, s7, s15
1244+
; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1245+
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
12691246
; GFX8-NEXT: s_or_b32 s2, s2, s4
1270-
; GFX8-NEXT: s_lshl_b32 s3, s3, 16
1271-
; GFX8-NEXT: s_and_b32 s4, s6, 0xffff
1272-
; GFX8-NEXT: s_or_b32 s0, s0, s7
1247+
; GFX8-NEXT: s_and_b32 s4, 0xffff, s7
1248+
; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
1249+
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
12731250
; GFX8-NEXT: s_or_b32 s3, s3, s4
12741251
; GFX8-NEXT: ; return to shader part epilog
12751252
;

0 commit comments

Comments
 (0)