@@ -70,30 +70,15 @@ define i8 @v_ashr_i8_7(i8 %value) {
70
70
}
71
71
72
72
define amdgpu_ps i8 @s_ashr_i8 (i8 inreg %value , i8 inreg %amount ) {
73
- ; GFX6-LABEL: s_ashr_i8:
74
- ; GFX6: ; %bb.0:
75
- ; GFX6-NEXT: s_sext_i32_i8 s0, s0
76
- ; GFX6-NEXT: s_ashr_i32 s0, s0, s1
77
- ; GFX6-NEXT: ; return to shader part epilog
78
- ;
79
- ; GFX8-LABEL: s_ashr_i8:
80
- ; GFX8: ; %bb.0:
81
- ; GFX8-NEXT: s_sext_i32_i8 s0, s0
82
- ; GFX8-NEXT: s_sext_i32_i8 s1, s1
83
- ; GFX8-NEXT: s_ashr_i32 s0, s0, s1
84
- ; GFX8-NEXT: ; return to shader part epilog
85
- ;
86
- ; GFX9-LABEL: s_ashr_i8:
87
- ; GFX9: ; %bb.0:
88
- ; GFX9-NEXT: s_sext_i32_i8 s0, s0
89
- ; GFX9-NEXT: s_sext_i32_i8 s1, s1
90
- ; GFX9-NEXT: s_ashr_i32 s0, s0, s1
91
- ; GFX9-NEXT: ; return to shader part epilog
73
+ ; GCN-LABEL: s_ashr_i8:
74
+ ; GCN: ; %bb.0:
75
+ ; GCN-NEXT: s_sext_i32_i8 s0, s0
76
+ ; GCN-NEXT: s_ashr_i32 s0, s0, s1
77
+ ; GCN-NEXT: ; return to shader part epilog
92
78
;
93
79
; GFX10PLUS-LABEL: s_ashr_i8:
94
80
; GFX10PLUS: ; %bb.0:
95
81
; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0
96
- ; GFX10PLUS-NEXT: s_sext_i32_i8 s1, s1
97
82
; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1
98
83
; GFX10PLUS-NEXT: ; return to shader part epilog
99
84
%result = ashr i8 %value , %amount
@@ -642,30 +627,15 @@ define i16 @v_ashr_i16_15(i16 %value) {
642
627
}
643
628
644
629
define amdgpu_ps i16 @s_ashr_i16 (i16 inreg %value , i16 inreg %amount ) {
645
- ; GFX6-LABEL: s_ashr_i16:
646
- ; GFX6: ; %bb.0:
647
- ; GFX6-NEXT: s_sext_i32_i16 s0, s0
648
- ; GFX6-NEXT: s_ashr_i32 s0, s0, s1
649
- ; GFX6-NEXT: ; return to shader part epilog
650
- ;
651
- ; GFX8-LABEL: s_ashr_i16:
652
- ; GFX8: ; %bb.0:
653
- ; GFX8-NEXT: s_sext_i32_i16 s0, s0
654
- ; GFX8-NEXT: s_sext_i32_i16 s1, s1
655
- ; GFX8-NEXT: s_ashr_i32 s0, s0, s1
656
- ; GFX8-NEXT: ; return to shader part epilog
657
- ;
658
- ; GFX9-LABEL: s_ashr_i16:
659
- ; GFX9: ; %bb.0:
660
- ; GFX9-NEXT: s_sext_i32_i16 s0, s0
661
- ; GFX9-NEXT: s_sext_i32_i16 s1, s1
662
- ; GFX9-NEXT: s_ashr_i32 s0, s0, s1
663
- ; GFX9-NEXT: ; return to shader part epilog
630
+ ; GCN-LABEL: s_ashr_i16:
631
+ ; GCN: ; %bb.0:
632
+ ; GCN-NEXT: s_sext_i32_i16 s0, s0
633
+ ; GCN-NEXT: s_ashr_i32 s0, s0, s1
634
+ ; GCN-NEXT: ; return to shader part epilog
664
635
;
665
636
; GFX10PLUS-LABEL: s_ashr_i16:
666
637
; GFX10PLUS: ; %bb.0:
667
638
; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0
668
- ; GFX10PLUS-NEXT: s_sext_i32_i16 s1, s1
669
639
; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1
670
640
; GFX10PLUS-NEXT: ; return to shader part epilog
671
641
%result = ashr i16 %value , %amount
@@ -826,14 +796,15 @@ define amdgpu_ps i32 @s_ashr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amou
826
796
;
827
797
; GFX8-LABEL: s_ashr_v2i16:
828
798
; GFX8: ; %bb.0:
829
- ; GFX8-NEXT: s_sext_i32_i16 s2, s0
830
- ; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
831
- ; GFX8-NEXT: s_sext_i32_i16 s3, s1
832
- ; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
833
- ; GFX8-NEXT: s_ashr_i32 s2, s2, s3
799
+ ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
800
+ ; GFX8-NEXT: s_sext_i32_i16 s0, s0
801
+ ; GFX8-NEXT: s_lshr_b32 s3, s1, 16
834
802
; GFX8-NEXT: s_ashr_i32 s0, s0, s1
835
- ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
836
- ; GFX8-NEXT: s_and_b32 s1, s2, 0xffff
803
+ ; GFX8-NEXT: s_sext_i32_i16 s1, s2
804
+ ; GFX8-NEXT: s_ashr_i32 s1, s1, s3
805
+ ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
806
+ ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
807
+ ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
837
808
; GFX8-NEXT: s_or_b32 s0, s0, s1
838
809
; GFX8-NEXT: ; return to shader part epilog
839
810
;
@@ -1028,23 +999,25 @@ define amdgpu_ps <2 x i32> @s_ashr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg
1028
999
;
1029
1000
; GFX8-LABEL: s_ashr_v4i16:
1030
1001
; GFX8: ; %bb.0:
1031
- ; GFX8-NEXT: s_sext_i32_i16 s4, s0
1032
- ; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
1033
- ; GFX8-NEXT: s_sext_i32_i16 s5, s1
1034
- ; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
1035
- ; GFX8-NEXT: s_sext_i32_i16 s6, s2
1036
- ; GFX8-NEXT: s_bfe_i32 s2, s2, 0x100010
1037
- ; GFX8-NEXT: s_sext_i32_i16 s7, s3
1038
- ; GFX8-NEXT: s_bfe_i32 s3, s3, 0x100010
1039
- ; GFX8-NEXT: s_ashr_i32 s4, s4, s6
1002
+ ; GFX8-NEXT: s_lshr_b32 s4, s0, 16
1003
+ ; GFX8-NEXT: s_sext_i32_i16 s0, s0
1004
+ ; GFX8-NEXT: s_lshr_b32 s6, s2, 16
1040
1005
; GFX8-NEXT: s_ashr_i32 s0, s0, s2
1041
- ; GFX8-NEXT: s_ashr_i32 s2, s5, s7
1006
+ ; GFX8-NEXT: s_sext_i32_i16 s2, s4
1007
+ ; GFX8-NEXT: s_lshr_b32 s5, s1, 16
1008
+ ; GFX8-NEXT: s_ashr_i32 s2, s2, s6
1009
+ ; GFX8-NEXT: s_sext_i32_i16 s1, s1
1010
+ ; GFX8-NEXT: s_lshr_b32 s7, s3, 16
1042
1011
; GFX8-NEXT: s_ashr_i32 s1, s1, s3
1043
- ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
1044
- ; GFX8-NEXT: s_and_b32 s3, s4, 0xffff
1045
- ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1046
- ; GFX8-NEXT: s_and_b32 s2, s2, 0xffff
1047
- ; GFX8-NEXT: s_or_b32 s0, s0, s3
1012
+ ; GFX8-NEXT: s_sext_i32_i16 s3, s5
1013
+ ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1014
+ ; GFX8-NEXT: s_ashr_i32 s3, s3, s7
1015
+ ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
1016
+ ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1017
+ ; GFX8-NEXT: s_or_b32 s0, s0, s2
1018
+ ; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
1019
+ ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1020
+ ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1048
1021
; GFX8-NEXT: s_or_b32 s1, s1, s2
1049
1022
; GFX8-NEXT: ; return to shader part epilog
1050
1023
;
@@ -1235,41 +1208,45 @@ define amdgpu_ps <4 x i32> @s_ashr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg
1235
1208
;
1236
1209
; GFX8-LABEL: s_ashr_v8i16:
1237
1210
; GFX8: ; %bb.0:
1238
- ; GFX8-NEXT: s_sext_i32_i16 s8, s0
1239
- ; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
1240
- ; GFX8-NEXT: s_sext_i32_i16 s9, s1
1241
- ; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
1242
- ; GFX8-NEXT: s_sext_i32_i16 s12, s4
1243
- ; GFX8-NEXT: s_bfe_i32 s4, s4, 0x100010
1244
- ; GFX8-NEXT: s_sext_i32_i16 s13, s5
1245
- ; GFX8-NEXT: s_bfe_i32 s5, s5, 0x100010
1246
- ; GFX8-NEXT: s_sext_i32_i16 s10, s2
1247
- ; GFX8-NEXT: s_bfe_i32 s2, s2, 0x100010
1248
- ; GFX8-NEXT: s_sext_i32_i16 s14, s6
1249
- ; GFX8-NEXT: s_bfe_i32 s6, s6, 0x100010
1211
+ ; GFX8-NEXT: s_lshr_b32 s8, s0, 16
1212
+ ; GFX8-NEXT: s_sext_i32_i16 s0, s0
1213
+ ; GFX8-NEXT: s_lshr_b32 s12, s4, 16
1250
1214
; GFX8-NEXT: s_ashr_i32 s0, s0, s4
1251
- ; GFX8-NEXT: s_ashr_i32 s4, s9, s13
1215
+ ; GFX8-NEXT: s_sext_i32_i16 s4, s8
1216
+ ; GFX8-NEXT: s_lshr_b32 s9, s1, 16
1217
+ ; GFX8-NEXT: s_ashr_i32 s4, s4, s12
1218
+ ; GFX8-NEXT: s_sext_i32_i16 s1, s1
1219
+ ; GFX8-NEXT: s_lshr_b32 s13, s5, 16
1252
1220
; GFX8-NEXT: s_ashr_i32 s1, s1, s5
1253
- ; GFX8-NEXT: s_sext_i32_i16 s11, s3
1254
- ; GFX8-NEXT: s_bfe_i32 s3, s3, 0x100010
1255
- ; GFX8-NEXT: s_sext_i32_i16 s15, s7
1256
- ; GFX8-NEXT: s_bfe_i32 s7, s7, 0x100010
1257
- ; GFX8-NEXT: s_ashr_i32 s5, s10, s14
1221
+ ; GFX8-NEXT: s_sext_i32_i16 s5, s9
1222
+ ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
1223
+ ; GFX8-NEXT: s_lshr_b32 s10, s2, 16
1224
+ ; GFX8-NEXT: s_ashr_i32 s5, s5, s13
1225
+ ; GFX8-NEXT: s_sext_i32_i16 s2, s2
1226
+ ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
1227
+ ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1228
+ ; GFX8-NEXT: s_lshr_b32 s14, s6, 16
1258
1229
; GFX8-NEXT: s_ashr_i32 s2, s2, s6
1259
- ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1260
- ; GFX8-NEXT: s_and_b32 s4, s4, 0xffff
1261
- ; GFX8-NEXT: s_ashr_i32 s8, s8, s12
1262
- ; GFX8-NEXT: s_ashr_i32 s6, s11, s15
1230
+ ; GFX8-NEXT: s_sext_i32_i16 s6, s10
1231
+ ; GFX8-NEXT: s_or_b32 s0, s0, s4
1232
+ ; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
1233
+ ; GFX8-NEXT: s_lshr_b32 s11, s3, 16
1234
+ ; GFX8-NEXT: s_ashr_i32 s6, s6, s14
1235
+ ; GFX8-NEXT: s_sext_i32_i16 s3, s3
1236
+ ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1237
+ ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1238
+ ; GFX8-NEXT: s_lshr_b32 s15, s7, 16
1263
1239
; GFX8-NEXT: s_ashr_i32 s3, s3, s7
1240
+ ; GFX8-NEXT: s_sext_i32_i16 s7, s11
1264
1241
; GFX8-NEXT: s_or_b32 s1, s1, s4
1265
- ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1266
- ; GFX8-NEXT: s_and_b32 s4, s5, 0xffff
1267
- ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
1268
- ; GFX8-NEXT: s_and_b32 s7, s8, 0xffff
1242
+ ; GFX8-NEXT: s_and_b32 s4, 0xffff, s6
1243
+ ; GFX8-NEXT: s_ashr_i32 s7, s7, s15
1244
+ ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1245
+ ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1269
1246
; GFX8-NEXT: s_or_b32 s2, s2, s4
1270
- ; GFX8-NEXT: s_lshl_b32 s3, s3, 16
1271
- ; GFX8-NEXT: s_and_b32 s4, s6, 0xffff
1272
- ; GFX8-NEXT: s_or_b32 s0, s0, s7
1247
+ ; GFX8-NEXT: s_and_b32 s4, 0xffff, s7
1248
+ ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
1249
+ ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1273
1250
; GFX8-NEXT: s_or_b32 s3, s3, s4
1274
1251
; GFX8-NEXT: ; return to shader part epilog
1275
1252
;
0 commit comments