|
3 | 3 | ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -amdgpu-bypass-slow-div=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s |
4 | 4 | ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -amdgpu-bypass-slow-div=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s |
5 | 5 |
|
6 | | -define amdgpu_kernel void @sdivrem_i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i32 %x, i32 %y) #0 { |
| 6 | +define amdgpu_kernel void @sdivrem_i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i32 %x, i32 %y) { |
7 | 7 | ; GFX8-LABEL: sdivrem_i32: |
8 | 8 | ; GFX8: ; %bb.0: |
9 | 9 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10 |
| 10 | +; GFX8-NEXT: s_add_i32 s12, s12, s17 |
| 11 | +; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| 12 | +; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
10 | 13 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
11 | 14 | ; GFX8-NEXT: s_ashr_i32 s6, s5, 31 |
12 | 15 | ; GFX8-NEXT: s_add_i32 s0, s5, s6 |
@@ -142,10 +145,13 @@ define amdgpu_kernel void @sdivrem_i32(ptr addrspace(1) %out0, ptr addrspace(1) |
142 | 145 | ret void |
143 | 146 | } |
144 | 147 |
|
145 | | -define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i64 %x, i64 %y) #0 { |
| 148 | +define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i64 %x, i64 %y) { |
146 | 149 | ; GFX8-LABEL: sdivrem_i64: |
147 | 150 | ; GFX8: ; %bb.0: |
148 | 151 | ; GFX8-NEXT: s_load_dwordx8 s[4:11], s[8:9], 0x0 |
| 152 | +; GFX8-NEXT: s_add_i32 s12, s12, s17 |
| 153 | +; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
| 154 | +; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13 |
149 | 155 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
150 | 156 | ; GFX8-NEXT: s_ashr_i32 s2, s9, 31 |
151 | 157 | ; GFX8-NEXT: s_ashr_i32 s12, s11, 31 |
@@ -613,10 +619,13 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) |
613 | 619 | ret void |
614 | 620 | } |
615 | 621 |
|
616 | | -define amdgpu_kernel void @sdivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i32> %x, <2 x i32> %y) #0 { |
| 622 | +define amdgpu_kernel void @sdivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i32> %x, <2 x i32> %y) { |
617 | 623 | ; GFX8-LABEL: sdivrem_v2i32: |
618 | 624 | ; GFX8: ; %bb.0: |
619 | 625 | ; GFX8-NEXT: s_load_dwordx8 s[4:11], s[8:9], 0x0 |
| 626 | +; GFX8-NEXT: s_add_i32 s12, s12, s17 |
| 627 | +; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
| 628 | +; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13 |
620 | 629 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
621 | 630 | ; GFX8-NEXT: s_ashr_i32 s2, s10, 31 |
622 | 631 | ; GFX8-NEXT: s_add_i32 s0, s10, s2 |
@@ -842,9 +851,12 @@ define amdgpu_kernel void @sdivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1 |
842 | 851 | ret void |
843 | 852 | } |
844 | 853 |
|
845 | | -define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <4 x i32> %x, <4 x i32> %y) #0 { |
| 854 | +define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <4 x i32> %x, <4 x i32> %y) { |
846 | 855 | ; GFX8-LABEL: sdivrem_v4i32: |
847 | 856 | ; GFX8: ; %bb.0: |
| 857 | +; GFX8-NEXT: s_add_i32 s12, s12, s17 |
| 858 | +; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| 859 | +; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
848 | 860 | ; GFX8-NEXT: s_load_dwordx8 s[12:19], s[8:9], 0x10 |
849 | 861 | ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0 |
850 | 862 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
@@ -1268,9 +1280,12 @@ define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1 |
1268 | 1280 | ret void |
1269 | 1281 | } |
1270 | 1282 |
|
1271 | | -define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i64> %x, <2 x i64> %y) #0 { |
| 1283 | +define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i64> %x, <2 x i64> %y) { |
1272 | 1284 | ; GFX8-LABEL: sdivrem_v2i64: |
1273 | 1285 | ; GFX8: ; %bb.0: |
| 1286 | +; GFX8-NEXT: s_add_i32 s12, s12, s17 |
| 1287 | +; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| 1288 | +; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
1274 | 1289 | ; GFX8-NEXT: s_load_dwordx8 s[12:19], s[8:9], 0x0 |
1275 | 1290 | ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x20 |
1276 | 1291 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
@@ -2183,10 +2198,13 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1 |
2183 | 2198 | ret void |
2184 | 2199 | } |
2185 | 2200 |
|
2186 | | -define amdgpu_kernel void @sdiv_i8(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i8 %x, i8 %y) #0 { |
| 2201 | +define amdgpu_kernel void @sdiv_i8(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i8 %x, i8 %y) { |
2187 | 2202 | ; GFX8-LABEL: sdiv_i8: |
2188 | 2203 | ; GFX8: ; %bb.0: |
2189 | 2204 | ; GFX8-NEXT: s_load_dword s4, s[8:9], 0x10 |
| 2205 | +; GFX8-NEXT: s_add_i32 s12, s12, s17 |
| 2206 | +; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| 2207 | +; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
2190 | 2208 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
2191 | 2209 | ; GFX8-NEXT: s_bfe_i32 s0, s4, 0x80008 |
2192 | 2210 | ; GFX8-NEXT: s_ashr_i32 s5, s0, 31 |
@@ -2328,10 +2346,13 @@ define amdgpu_kernel void @sdiv_i8(ptr addrspace(1) %out0, ptr addrspace(1) %out |
2328 | 2346 | ret void |
2329 | 2347 | } |
2330 | 2348 |
|
2331 | | -define amdgpu_kernel void @sdivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i8> %x, <2 x i8> %y) #0 { |
| 2349 | +define amdgpu_kernel void @sdivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i8> %x, <2 x i8> %y) { |
2332 | 2350 | ; GFX8-LABEL: sdivrem_v2i8: |
2333 | 2351 | ; GFX8: ; %bb.0: |
2334 | 2352 | ; GFX8-NEXT: s_load_dword s2, s[8:9], 0x10 |
| 2353 | +; GFX8-NEXT: s_add_i32 s12, s12, s17 |
| 2354 | +; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
| 2355 | +; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13 |
2335 | 2356 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
2336 | 2357 | ; GFX8-NEXT: s_bfe_i32 s0, s2, 0x80010 |
2337 | 2358 | ; GFX8-NEXT: s_ashr_i32 s3, s0, 31 |
@@ -2592,10 +2613,13 @@ define amdgpu_kernel void @sdivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1) |
2592 | 2613 | ret void |
2593 | 2614 | } |
2594 | 2615 |
|
2595 | | -define amdgpu_kernel void @sdiv_i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i16 %x, i16 %y) #0 { |
| 2616 | +define amdgpu_kernel void @sdiv_i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i16 %x, i16 %y) { |
2596 | 2617 | ; GFX8-LABEL: sdiv_i16: |
2597 | 2618 | ; GFX8: ; %bb.0: |
2598 | 2619 | ; GFX8-NEXT: s_load_dword s4, s[8:9], 0x10 |
| 2620 | +; GFX8-NEXT: s_add_i32 s12, s12, s17 |
| 2621 | +; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| 2622 | +; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
2599 | 2623 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
2600 | 2624 | ; GFX8-NEXT: s_bfe_i32 s0, s4, 0x100010 |
2601 | 2625 | ; GFX8-NEXT: s_ashr_i32 s5, s0, 31 |
@@ -2737,10 +2761,13 @@ define amdgpu_kernel void @sdiv_i16(ptr addrspace(1) %out0, ptr addrspace(1) %ou |
2737 | 2761 | ret void |
2738 | 2762 | } |
2739 | 2763 |
|
2740 | | -define amdgpu_kernel void @sdivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i16> %x, <2 x i16> %y) #0 { |
| 2764 | +define amdgpu_kernel void @sdivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i16> %x, <2 x i16> %y) { |
2741 | 2765 | ; GFX8-LABEL: sdivrem_v2i16: |
2742 | 2766 | ; GFX8: ; %bb.0: |
2743 | 2767 | ; GFX8-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x10 |
| 2768 | +; GFX8-NEXT: s_add_i32 s12, s12, s17 |
| 2769 | +; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
| 2770 | +; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13 |
2744 | 2771 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
2745 | 2772 | ; GFX8-NEXT: s_sext_i32_i16 s0, s3 |
2746 | 2773 | ; GFX8-NEXT: s_ashr_i32 s10, s0, 31 |
@@ -2998,10 +3025,13 @@ define amdgpu_kernel void @sdivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1 |
2998 | 3025 | ret void |
2999 | 3026 | } |
3000 | 3027 |
|
3001 | | -define amdgpu_kernel void @sdivrem_i3(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i3 %x, i3 %y) #0 { |
| 3028 | +define amdgpu_kernel void @sdivrem_i3(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i3 %x, i3 %y) { |
3002 | 3029 | ; GFX8-LABEL: sdivrem_i3: |
3003 | 3030 | ; GFX8: ; %bb.0: |
3004 | 3031 | ; GFX8-NEXT: s_load_dword s4, s[8:9], 0x10 |
| 3032 | +; GFX8-NEXT: s_add_i32 s12, s12, s17 |
| 3033 | +; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| 3034 | +; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
3005 | 3035 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
3006 | 3036 | ; GFX8-NEXT: s_bfe_i32 s0, s4, 0x30008 |
3007 | 3037 | ; GFX8-NEXT: s_ashr_i32 s5, s0, 31 |
@@ -3149,10 +3179,13 @@ define amdgpu_kernel void @sdivrem_i3(ptr addrspace(1) %out0, ptr addrspace(1) % |
3149 | 3179 | ret void |
3150 | 3180 | } |
3151 | 3181 |
|
3152 | | -define amdgpu_kernel void @sdivrem_i27(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i27 %x, i27 %y) #0 { |
| 3182 | +define amdgpu_kernel void @sdivrem_i27(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i27 %x, i27 %y) { |
3153 | 3183 | ; GFX8-LABEL: sdivrem_i27: |
3154 | 3184 | ; GFX8: ; %bb.0: |
3155 | 3185 | ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10 |
| 3186 | +; GFX8-NEXT: s_add_i32 s12, s12, s17 |
| 3187 | +; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s13 |
| 3188 | +; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 |
3156 | 3189 | ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
3157 | 3190 | ; GFX8-NEXT: s_bfe_i32 s0, s5, 0x1b0000 |
3158 | 3191 | ; GFX8-NEXT: s_ashr_i32 s5, s0, 31 |
@@ -3299,5 +3332,3 @@ define amdgpu_kernel void @sdivrem_i27(ptr addrspace(1) %out0, ptr addrspace(1) |
3299 | 3332 | store i27 %rem, ptr addrspace(1) %out1 |
3300 | 3333 | ret void |
3301 | 3334 | } |
3302 | | - |
3303 | | -attributes #0 = { "amdgpu-no-flat-scratch-init" } |
|
0 commit comments