Description
Hi,
This issue can almost be considered a clone of #115989, except that the example used here has affine scalar loads and stores, instead of their affine vector counterparts. I see that incorrect memref sizes are generated after fusing the loops below :
// fusion_test.mlir
module {
func.func @main(%arg0: memref<2x4xf32>, %arg1: memref<1x2x4xf32>) {
%alloc = memref.alloc() : memref<1x2x4xf32>
affine.for %i = 0 to 1 {
%0 = affine.load %arg0[0, 0] : memref<2x4xf32>
%1 = affine.load %arg0[0, 1] : memref<2x4xf32>
%2 = affine.load %arg0[0, 2] : memref<2x4xf32>
%3 = affine.load %arg0[0, 3] : memref<2x4xf32>
%4 = affine.load %arg0[1, 0] : memref<2x4xf32>
%5 = affine.load %arg0[1, 1] : memref<2x4xf32>
%6 = affine.load %arg0[1, 2] : memref<2x4xf32>
%7 = affine.load %arg0[1, 3] : memref<2x4xf32>
affine.store %0, %alloc[0, 0, 0] : memref<1x2x4xf32>
affine.store %1, %alloc[0, 0, 1] : memref<1x2x4xf32>
affine.store %2, %alloc[0, 0, 2] : memref<1x2x4xf32>
affine.store %3, %alloc[0, 0, 3] : memref<1x2x4xf32>
affine.store %4, %alloc[0, 1, 0] : memref<1x2x4xf32>
affine.store %5, %alloc[0, 1, 1] : memref<1x2x4xf32>
affine.store %6, %alloc[0, 1, 2] : memref<1x2x4xf32>
affine.store %7, %alloc[0, 1, 3] : memref<1x2x4xf32>
}
affine.for %i = 0 to 2 {
affine.for %j = 0 to 4 {
%8 = affine.load %alloc[0, %i, %j] : memref<1x2x4xf32>
%9 = arith.negf %8 : f32
affine.store %9, %arg1[0, %i, %j] : memref<1x2x4xf32>
}
}
return
}
}
$> mlir-opt --affine-loop-fusion fusion_test.mlir
results in the following output:
module {
func.func @main(%arg0: memref<2x4xf32>, %arg1: memref<1x2x4xf32>) {
%alloc = memref.alloc() : memref<1x1x1xf32>
affine.for %arg2 = 0 to 2 {
affine.for %arg3 = 0 to 4 {
%0 = affine.load %arg0[0, 0] : memref<2x4xf32>
%1 = affine.load %arg0[0, 1] : memref<2x4xf32>
%2 = affine.load %arg0[0, 2] : memref<2x4xf32>
%3 = affine.load %arg0[0, 3] : memref<2x4xf32>
%4 = affine.load %arg0[1, 0] : memref<2x4xf32>
%5 = affine.load %arg0[1, 1] : memref<2x4xf32>
%6 = affine.load %arg0[1, 2] : memref<2x4xf32>
%7 = affine.load %arg0[1, 3] : memref<2x4xf32>
affine.store %0, %alloc[0, 0, 0] : memref<1x1x1xf32>
affine.store %1, %alloc[0, 0, 1] : memref<1x1x1xf32>
affine.store %2, %alloc[0, 0, 2] : memref<1x1x1xf32>
affine.store %3, %alloc[0, 0, 3] : memref<1x1x1xf32>
affine.store %4, %alloc[0, 1, 0] : memref<1x1x1xf32>
affine.store %5, %alloc[0, 1, 1] : memref<1x1x1xf32>
affine.store %6, %alloc[0, 1, 2] : memref<1x1x1xf32>
affine.store %7, %alloc[0, 1, 3] : memref<1x1x1xf32>
%8 = affine.load %alloc[0, %arg2, %arg3] : memref<1x1x1xf32>
%9 = arith.negf %8 : f32
affine.store %9, %arg1[0, %arg2, %arg3] : memref<1x2x4xf32>
}
}
return
}
}
Here, the type of %alloc
changed from memref<1x2x4xf32>
to memref<1x1x1xf32>
which is incorrect. The memref privatization code needs fixing.