@@ -261,14 +261,14 @@ gpu.module @unroll_full {
261
261
262
262
// UNROLL-FULL-LABEL: func @thread_partial_execution
263
263
func.func @thread_partial_execution () {
264
- %0 = arith.constant 0 :index
265
- %1 = arith.constant 2 : index
264
+ %c0 = arith.constant 0 :index
265
+ %c2 = arith.constant 2 : index
266
266
// UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
267
- gpu.launch blocks (%bx , %by , %bz ) in (%sz_bx = %1 , %sz_by = %1 , %sz_bz = %1 )
268
- threads (%tx , %ty , %tz ) in (%sz_tx = %1 , %sz_ty = %1 , %sz_tz = %1 ) {
269
- affine.for %iv = %tx to 3 step 2 iter_args (%arg = %0 ) -> index {
270
- %3 = arith.addi %arg , %0 : index
271
- affine.yield %3 : index
267
+ gpu.launch blocks (%bx , %by , %bz ) in (%sz_bx = %c2 , %sz_by = %c2 , %sz_bz = %c2 )
268
+ threads (%tx , %ty , %tz ) in (%sz_tx = %c2 , %sz_ty = %c2 , %sz_tz = %c2 ) {
269
+ affine.for %iv = %tx to 3 step 2 iter_args (%arg = %c0 ) -> index {
270
+ %sum = arith.addi %arg , %c0 : index
271
+ affine.yield %sum : index
272
272
}
273
273
// UNROLL-FULL: affine.for %{{.*}} = %{{.*}} to 3 step 2 iter_args(%[[ARG:.*]] = %[[C0]]) -> (index) {
274
274
// UNROLL-FULL-NEXT: %[[SUM:.*]] = arith.addi %[[ARG]], %[[C0]] : index
@@ -281,15 +281,15 @@ func.func @thread_partial_execution() {
281
281
282
282
// UNROLL-FULL-LABEL: func @unroll_all_thread
283
283
func.func @unroll_all_thread () {
284
- %0 = arith.constant 0 :index
285
- %1 = arith.constant 2 : index
284
+ %c0 = arith.constant 0 :index
285
+ %c2 = arith.constant 2 : index
286
286
// UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
287
- gpu.launch blocks (%bx , %by , %bz ) in (%sz_bx = %1 , %sz_by = %1 , %sz_bz = %1 )
288
- threads (%tx , %ty , %tz ) in (%sz_tx = %1 , %sz_ty = %1 , %sz_tz = %1 ) {
287
+ gpu.launch blocks (%bx , %by , %bz ) in (%sz_bx = %c2 , %sz_by = %c2 , %sz_bz = %c2 )
288
+ threads (%tx , %ty , %tz ) in (%sz_tx = %c2 , %sz_ty = %c2 , %sz_tz = %c2 ) {
289
289
%threadid = gpu.thread_id x
290
- %4 = affine.for %iv = %threadid to 6 step 2 iter_args (%arg = %0 ) -> index {
291
- %3 = arith.addi %arg , %0 : index
292
- affine.yield %3 : index
290
+ affine.for %iv = %threadid to 6 step 2 iter_args (%arg = %c0 ) -> index {
291
+ %sum = arith.addi %arg , %c0 : index
292
+ affine.yield %sum : index
293
293
}
294
294
// UNROLL-FULL: %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
295
295
// UNROLL-FULL-NEXT: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
@@ -301,15 +301,15 @@ func.func @unroll_all_thread() {
301
301
302
302
// UNROLL-FULL-LABEL: func.func @partial_unroll_factor_4
303
303
func.func @partial_unroll_factor_4 () {
304
- %0 = arith.constant 0 :index
305
- %1 = arith.constant 2 : index
304
+ %c0 = arith.constant 0 :index
305
+ %c2 = arith.constant 2 : index
306
306
// UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
307
- gpu.launch blocks (%bx , %by , %bz ) in (%sz_bx = %1 , %sz_by = %1 , %sz_bz = %1 )
308
- threads (%tx , %ty , %tz ) in (%sz_tx = %1 , %sz_ty = %1 , %sz_tz = %1 ) {
307
+ gpu.launch blocks (%bx , %by , %bz ) in (%sz_bx = %c2 , %sz_by = %c2 , %sz_bz = %c2 )
308
+ threads (%tx , %ty , %tz ) in (%sz_tx = %c2 , %sz_ty = %c2 , %sz_tz = %c2 ) {
309
309
%threadid = gpu.thread_id x
310
- affine.for %iv = %threadid to 9 step 2 iter_args (%arg = %0 ) -> index {
311
- %3 = arith.addi %arg , %0 : index
312
- affine.yield %3 : index
310
+ affine.for %iv = %threadid to 9 step 2 iter_args (%arg = %c0 ) -> index {
311
+ %sum = arith.addi %arg , %c0 : index
312
+ affine.yield %sum : index
313
313
}
314
314
gpu.terminator
315
315
}
@@ -769,15 +769,15 @@ func.func @unroll_with_iter_args_and_promotion(%arg0 : f32, %arg1 : f32) -> f32
769
769
770
770
// UNROLL-BY-4-LABEL: func @gpu_launch_unroll_by_factor_4
771
771
func.func @gpu_launch_unroll_by_factor_4 () {
772
- %0 = arith.constant 0 :index
773
- %1 = arith.constant 2 : index
772
+ %c0 = arith.constant 0 :index
773
+ %c2 = arith.constant 2 : index
774
774
// UNROLL-BY-4: %[[C0:.*]] = arith.constant 0 : index
775
- gpu.launch blocks (%bx , %by , %bz ) in (%sz_bx = %1 , %sz_by = %1 , %sz_bz = %1 )
776
- threads (%tx , %ty , %tz ) in (%sz_tx = %1 , %sz_ty = %1 , %sz_tz = %1 ) {
775
+ gpu.launch blocks (%bx , %by , %bz ) in (%sz_bx = %c2 , %sz_by = %c2 , %sz_bz = %c2 )
776
+ threads (%tx , %ty , %tz ) in (%sz_tx = %c2 , %sz_ty = %c2 , %sz_tz = %c2 ) {
777
777
%threadid = gpu.thread_id x
778
- affine.for %iv = %threadid to 11 step 2 iter_args (%arg = %0 ) -> index {
779
- %3 = arith.addi %arg , %0 : index
780
- affine.yield %3 : index
778
+ affine.for %iv = %threadid to 11 step 2 iter_args (%arg = %c0 ) -> index {
779
+ %sum = arith.addi %arg , %c0 : index
780
+ affine.yield %sum : index
781
781
}
782
782
gpu.terminator
783
783
}
0 commit comments