@@ -255,7 +255,15 @@ struct PackOpTiling
255255 ArrayRef<OpFoldResult> offsets, ArrayRef<OpFoldResult> sizes,
256256 SmallVectorImpl<OpFoldResult> &resultOffsets,
257257 SmallVectorImpl<OpFoldResult> &resultSizes) const {
258+ if (operandNumber != 0 )
259+ return failure ();
260+
258261 auto packOp = cast<PackOp>(op);
262+ // It is not trivial to infer dest tile from source tile if `packOp` has
263+ // padding semantic.
264+ if (packOp.getPaddingValue ())
265+ return failure ();
266+
259267 Location loc = packOp.getLoc ();
260268
261269 SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
@@ -269,7 +277,20 @@ struct PackOpTiling
269277 /* stopCondition=*/ nullptr , /* closedUB=*/ true );
270278 std::optional<int64_t > cstInnerSize =
271279 getConstantIntValue (dimAndTileMapping[dim]);
272- // Currently only expect perfect tiling cases.
280+ // Currently fusing `packOp` as consumer only expects perfect tiling
281+ // scenario because even if without padding semantic, the `packOp` may
282+ // also yield incomplete tiles. E.g. tensor<30xf32> -> tensor<5x6xf32>,
283+ // where the `tileSize` from operand of `packOp` is 5, which is not
284+ // exactly divided by `innerTile`(=6) of `packOp`. As the result:
285+ // 1. the first slice is extracted from (0) to (4) and inserted into
286+ // (0,0)~(0,4) at first row.
287+ // 2. the second slice is extracted from (5) to (9) and SHOULD BE
288+ // respectively inserted into two rows with different length, including
289+ // first row: (0,5) and second row (1,0)~(1,3). It is hard to coordinate
290+ // them, thus adding below constraint to bypass them temporarily. In
291+ // another word, we can only support tiling with consumer if the tile
292+ // size for the producer is a multiple of the inner tile size for the
293+ // packed dimensions at this moment.
273294 if (failed (cstSize) || !cstInnerSize || *cstSize % *cstInnerSize != 0 ) {
274295 return failure ();
275296 }
@@ -299,6 +320,9 @@ struct PackOpTiling
299320 FailureOr<TilingResult> getTiledImplementationFromOperandTile (
300321 Operation *op, OpBuilder &b, unsigned operandNumber,
301322 ArrayRef<OpFoldResult> offsets, ArrayRef<OpFoldResult> sizes) const {
323+ if (operandNumber != 0 )
324+ return failure ();
325+
302326 auto packOp = cast<PackOp>(op);
303327 Location loc = packOp.getLoc ();
304328
@@ -326,8 +350,7 @@ struct PackOpTiling
326350 loc, packOp.getDest (), outputOffsets, outputSizes, strides);
327351 tiledOperands.push_back (extractSlice);
328352
329- if (auto val = packOp.getPaddingValue ())
330- tiledOperands.push_back (val);
353+ assert (!packOp.getPaddingValue () && " Expect no padding semantic" );
331354 for (auto tile : packOp.getInnerTiles ())
332355 tiledOperands.push_back (tile);
333356
0 commit comments