@@ -811,16 +811,15 @@ define amdgpu_ps i32 @s_ashr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amou
811
811
;
812
812
; GFX8-LABEL: s_ashr_v2i16:
813
813
; GFX8: ; %bb.0:
814
- ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
815
- ; GFX8-NEXT: s_sext_i32_i16 s0, s0
816
- ; GFX8-NEXT: s_lshr_b32 s3, s1, 16
817
- ; GFX8-NEXT: s_ashr_i32 s0, s0, s1
818
- ; GFX8-NEXT: s_sext_i32_i16 s1, s2
819
- ; GFX8-NEXT: s_ashr_i32 s1, s1, s3
820
- ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
814
+ ; GFX8-NEXT: s_lshr_b32 s2, s1, 16
815
+ ; GFX8-NEXT: s_sext_i32_i16 s3, s0
816
+ ; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
817
+ ; GFX8-NEXT: s_ashr_i32 s0, s0, s2
818
+ ; GFX8-NEXT: s_ashr_i32 s1, s3, s1
821
819
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
822
- ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
823
- ; GFX8-NEXT: s_or_b32 s0, s0, s1
820
+ ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
821
+ ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
822
+ ; GFX8-NEXT: s_or_b32 s0, s1, s0
824
823
; GFX8-NEXT: ; return to shader part epilog
825
824
;
826
825
; GFX9-LABEL: s_ashr_v2i16:
@@ -1014,26 +1013,24 @@ define amdgpu_ps <2 x i32> @s_ashr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg
1014
1013
;
1015
1014
; GFX8-LABEL: s_ashr_v4i16:
1016
1015
; GFX8: ; %bb.0:
1017
- ; GFX8-NEXT: s_lshr_b32 s4, s0, 16
1018
- ; GFX8-NEXT: s_sext_i32_i16 s0, s0
1019
- ; GFX8-NEXT: s_lshr_b32 s6, s2, 16
1020
- ; GFX8-NEXT: s_ashr_i32 s0, s0, s2
1021
- ; GFX8-NEXT: s_sext_i32_i16 s2, s4
1022
- ; GFX8-NEXT: s_lshr_b32 s5, s1, 16
1023
- ; GFX8-NEXT: s_ashr_i32 s2, s2, s6
1024
- ; GFX8-NEXT: s_sext_i32_i16 s1, s1
1025
- ; GFX8-NEXT: s_lshr_b32 s7, s3, 16
1026
- ; GFX8-NEXT: s_ashr_i32 s1, s1, s3
1027
- ; GFX8-NEXT: s_sext_i32_i16 s3, s5
1028
- ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1029
- ; GFX8-NEXT: s_ashr_i32 s3, s3, s7
1016
+ ; GFX8-NEXT: s_lshr_b32 s4, s2, 16
1017
+ ; GFX8-NEXT: s_sext_i32_i16 s6, s0
1018
+ ; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
1019
+ ; GFX8-NEXT: s_lshr_b32 s5, s3, 16
1020
+ ; GFX8-NEXT: s_ashr_i32 s0, s0, s4
1021
+ ; GFX8-NEXT: s_sext_i32_i16 s4, s1
1022
+ ; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
1023
+ ; GFX8-NEXT: s_ashr_i32 s2, s6, s2
1024
+ ; GFX8-NEXT: s_ashr_i32 s1, s1, s5
1030
1025
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
1031
- ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1032
- ; GFX8-NEXT: s_or_b32 s0, s0 , s2
1033
- ; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
1026
+ ; GFX8-NEXT: s_ashr_i32 s3, s4, s3
1027
+ ; GFX8-NEXT: s_and_b32 s2, 0xffff , s2
1028
+ ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
1034
1029
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1035
- ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1036
- ; GFX8-NEXT: s_or_b32 s1, s1, s2
1030
+ ; GFX8-NEXT: s_or_b32 s0, s2, s0
1031
+ ; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
1032
+ ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1033
+ ; GFX8-NEXT: s_or_b32 s1, s2, s1
1037
1034
; GFX8-NEXT: ; return to shader part epilog
1038
1035
;
1039
1036
; GFX9-LABEL: s_ashr_v4i16:
@@ -1223,46 +1220,42 @@ define amdgpu_ps <4 x i32> @s_ashr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg
1223
1220
;
1224
1221
; GFX8-LABEL: s_ashr_v8i16:
1225
1222
; GFX8: ; %bb.0:
1226
- ; GFX8-NEXT: s_lshr_b32 s8, s0, 16
1227
- ; GFX8-NEXT: s_sext_i32_i16 s0, s0
1228
- ; GFX8-NEXT: s_lshr_b32 s12, s4, 16
1229
- ; GFX8-NEXT: s_ashr_i32 s0, s0, s4
1230
- ; GFX8-NEXT: s_sext_i32_i16 s4, s8
1231
- ; GFX8-NEXT: s_lshr_b32 s9, s1, 16
1232
- ; GFX8-NEXT: s_ashr_i32 s4, s4, s12
1233
- ; GFX8-NEXT: s_sext_i32_i16 s1, s1
1234
- ; GFX8-NEXT: s_lshr_b32 s13, s5, 16
1235
- ; GFX8-NEXT: s_ashr_i32 s1, s1, s5
1236
- ; GFX8-NEXT: s_sext_i32_i16 s5, s9
1237
- ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
1238
- ; GFX8-NEXT: s_lshr_b32 s10, s2, 16
1239
- ; GFX8-NEXT: s_ashr_i32 s5, s5, s13
1240
- ; GFX8-NEXT: s_sext_i32_i16 s2, s2
1223
+ ; GFX8-NEXT: s_lshr_b32 s8, s4, 16
1224
+ ; GFX8-NEXT: s_sext_i32_i16 s12, s0
1225
+ ; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
1226
+ ; GFX8-NEXT: s_lshr_b32 s9, s5, 16
1227
+ ; GFX8-NEXT: s_ashr_i32 s0, s0, s8
1228
+ ; GFX8-NEXT: s_sext_i32_i16 s8, s1
1229
+ ; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
1230
+ ; GFX8-NEXT: s_lshr_b32 s10, s6, 16
1231
+ ; GFX8-NEXT: s_ashr_i32 s4, s12, s4
1232
+ ; GFX8-NEXT: s_ashr_i32 s5, s8, s5
1233
+ ; GFX8-NEXT: s_ashr_i32 s1, s1, s9
1234
+ ; GFX8-NEXT: s_sext_i32_i16 s8, s2
1235
+ ; GFX8-NEXT: s_bfe_i32 s2, s2, 0x100010
1241
1236
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
1242
- ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1243
- ; GFX8-NEXT: s_lshr_b32 s14, s6, 16
1244
- ; GFX8-NEXT: s_ashr_i32 s2, s2, s6
1245
- ; GFX8-NEXT: s_sext_i32_i16 s6, s10
1246
- ; GFX8-NEXT: s_or_b32 s0, s0, s4
1247
- ; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
1248
- ; GFX8-NEXT: s_lshr_b32 s11, s3, 16
1249
- ; GFX8-NEXT: s_ashr_i32 s6, s6, s14
1250
- ; GFX8-NEXT: s_sext_i32_i16 s3, s3
1237
+ ; GFX8-NEXT: s_lshr_b32 s11, s7, 16
1238
+ ; GFX8-NEXT: s_ashr_i32 s6, s8, s6
1239
+ ; GFX8-NEXT: s_ashr_i32 s2, s2, s10
1240
+ ; GFX8-NEXT: s_sext_i32_i16 s8, s3
1241
+ ; GFX8-NEXT: s_bfe_i32 s3, s3, 0x100010
1242
+ ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
1243
+ ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
1251
1244
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1252
- ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1253
- ; GFX8-NEXT: s_lshr_b32 s15, s7, 16
1254
- ; GFX8-NEXT: s_ashr_i32 s3, s3, s7
1255
- ; GFX8-NEXT: s_sext_i32_i16 s7, s11
1256
- ; GFX8-NEXT: s_or_b32 s1, s1, s4
1257
- ; GFX8-NEXT: s_and_b32 s4, 0xffff, s6
1258
- ; GFX8-NEXT: s_ashr_i32 s7, s7, s15
1245
+ ; GFX8-NEXT: s_ashr_i32 s3, s3, s11
1246
+ ; GFX8-NEXT: s_or_b32 s0, s4, s0
1247
+ ; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
1248
+ ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1259
1249
; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1260
- ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1261
- ; GFX8-NEXT: s_or_b32 s2, s2, s4
1262
- ; GFX8-NEXT: s_and_b32 s4, 0xffff, s7
1250
+ ; GFX8-NEXT: s_ashr_i32 s7, s8, s7
1251
+ ; GFX8-NEXT: s_or_b32 s1, s4, s1
1252
+ ; GFX8-NEXT: s_and_b32 s4, 0xffff, s6
1253
+ ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1263
1254
; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
1264
- ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1265
- ; GFX8-NEXT: s_or_b32 s3, s3, s4
1255
+ ; GFX8-NEXT: s_or_b32 s2, s4, s2
1256
+ ; GFX8-NEXT: s_and_b32 s4, 0xffff, s7
1257
+ ; GFX8-NEXT: s_lshl_b32 s3, s3, 16
1258
+ ; GFX8-NEXT: s_or_b32 s3, s4, s3
1266
1259
; GFX8-NEXT: ; return to shader part epilog
1267
1260
;
1268
1261
; GFX9-LABEL: s_ashr_v8i16:
0 commit comments