diff --git a/src/Conversion/ONNXToKrnl/Math/Clip.cpp b/src/Conversion/ONNXToKrnl/Math/Clip.cpp index 89c0e5f023cc..4ec6c6b2efc6 100644 --- a/src/Conversion/ONNXToKrnl/Math/Clip.cpp +++ b/src/Conversion/ONNXToKrnl/Math/Clip.cpp @@ -28,73 +28,67 @@ struct ONNXClipOpLowering : public ConversionPattern { LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { Location loc = op->getLoc(); - Value input = operands[0]; - Value min = operands[1]; - Value max = operands[2]; + ONNXClipOp clipOp = cast(op); + MemRefType memRefType = convertToMemRefType(*op->result_type_begin()); - // Insert an allocation and deallocation for the result of this operation. - auto memRefType = convertToMemRefType(*op->result_type_begin()); - - Value alloc; - bool insertDealloc = checkInsertDealloc(op); + ONNXClipOpAdaptor operandAdaptor(operands); + ONNXClipOpShapeHelper shapeHelper(&clipOp, &rewriter, + getDenseElementAttributeFromKrnlValue, + loadDenseElementArrayValueAtIndex); + auto shapeComputed = shapeHelper.computeShape(operandAdaptor); + assert(succeeded(shapeComputed)); - if (hasAllConstantDimensions(memRefType)) - alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc); - else - alloc = insertAllocAndDealloc( - memRefType, loc, rewriter, insertDealloc, input); + Value input = operandAdaptor.input(); + Value min = operandAdaptor.min(); + Value max = operandAdaptor.max(); - SmallVector loopIVs; - // Only create krnl.iterate if one of the operands is not scalar tensor. + // Insert an allocation and deallocation for the result of this operation. + bool insertDealloc = checkInsertDealloc(op); + Value alloc = + (hasAllConstantDimensions(memRefType)) + ? insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc) + : insertAllocAndDealloc( + memRefType, loc, rewriter, insertDealloc, input); + + auto computeResult = + [&](MultiDialectBuilder &create, + const ValueRange &indices) { + Value loadedVal = create.krnl.load(input, indices); + Value res = loadedVal; + if (!min.getType().isa()) { + Value minVal = create.krnl.load(min); + Value lessThanMin = create.math.slt(res, minVal); + res = create.math.select(lessThanMin, minVal, res); + } + if (!max.getType().isa()) { + Value maxVal = create.krnl.load(max); + Value lessThanMax = create.math.slt(res, maxVal); + res = create.math.select(lessThanMax, res, maxVal); + } + create.krnl.store(res, alloc, indices); + }; + + // Create a loop only is one of the operands is not a scalar tensor. if (!hasAllScalarValues(operands)) { - // Create iterateOp & get block within iterate op. - BuildKrnlLoop loops(rewriter, loc, memRefType.getRank()); - loops.createDefineAndIterateOp(input); - Block *iterationBlock = loops.getIterateBlock(); - - // Insert instructions inside the KernelIterateOp body. - rewriter.setInsertionPointToStart(iterationBlock); - - // Handle the operation: - for (auto arg : iterationBlock->getArguments()) - loopIVs.push_back(arg); - } - - // Load unary first operand. - MultiDialectBuilder create(rewriter, loc); - Value loadedVal = create.krnl.load(input, loopIVs); - Type inputType = loadedVal.getType(); - Value res = loadedVal; - - if (inputType.isa()) { - if (!min.getType().isa()) { - Value minVal = create.krnl.load(min); - Value lessThanMin = create.math.slt(res, minVal); - res = create.math.select(lessThanMin, minVal, res); - } - if (!max.getType().isa()) { - Value maxVal = create.krnl.load(max); - Value lessThanMax = create.math.slt(res, maxVal); - res = create.math.select(lessThanMax, res, maxVal); - } - } else if (inputType.isa()) { - if (!min.getType().isa()) { - Value minVal = create.krnl.load(min); - Value lessThanMin = create.math.slt(res, minVal); - res = create.math.select(lessThanMin, minVal, res); - } - if (!max.getType().isa()) { - Value maxVal = create.krnl.load(max); - Value lessThanMax = create.math.slt(res, maxVal); - res = create.math.select(lessThanMax, res, maxVal); - } + KrnlBuilder createKrnl(rewriter, loc); + uint64_t numLoops = memRefType.getRank(); + ValueRange loopDef = createKrnl.defineLoops(numLoops); + + SmallVector lbs(numLoops, LiteralIndexExpr(0)); + SmallVector ubs; + for (uint64_t i = 0; i < numLoops; ++i) + ubs.emplace_back(shapeHelper.dimsForOutput()[i]); + + createKrnl.iterateIE(loopDef, loopDef, lbs, ubs, + [&](KrnlBuilder &createKrnl, ValueRange indices) { + MultiDialectBuilder create(createKrnl); + computeResult(create, indices); + }); } else { - llvm_unreachable("unsupported element type"); + MultiDialectBuilder create(rewriter, loc); + computeResult(create, {}); } - // Store result in the resulting array. - create.krnl.store(res, alloc, loopIVs); - rewriter.replaceOp(op, alloc); return success(); } diff --git a/src/Conversion/ONNXToKrnl/Tensor/Split.cpp b/src/Conversion/ONNXToKrnl/Tensor/Split.cpp index 3d2aeca41c7e..851fc4cb2bc8 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Split.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Split.cpp @@ -21,12 +21,13 @@ template LogicalResult ONNXSplitOpLoweringCommon(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) { // Gather info. - auto loc = op->getLoc(); + Location loc = op->getLoc(); Adaptor operandAdaptor(operands); - Op splitOp = llvm::dyn_cast(op); - auto rank = splitOp.input().getType().template cast().getRank(); - auto outputNum = splitOp.getNumResults(); - auto axis = splitOp.axis(); + Op splitOp = cast(op); + uint64_t rank = + splitOp.input().getType().template cast().getRank(); + unsigned outputNum = splitOp.getNumResults(); + unsigned axis = splitOp.axis(); // Get a shape helper. ShapeHelper shapeHelper(&splitOp, &rewriter, @@ -36,7 +37,7 @@ LogicalResult ONNXSplitOpLoweringCommon(Operation *op, ArrayRef operands, // Alloc and dealloc. SmallVector allocs; - for (unsigned int i = 0; i < outputNum; ++i) { + for (unsigned i = 0; i < outputNum; ++i) { checkInsertDealloc(op, i); auto memRefType = convertToMemRefType(splitOp.outputs()[i].getType()); Value alloc = insertAllocAndDeallocSimple( @@ -45,40 +46,44 @@ LogicalResult ONNXSplitOpLoweringCommon(Operation *op, ArrayRef operands, } // Creates loops, one for each output. - for (unsigned int i = 0; i < outputNum; ++i) { + for (unsigned i = 0; i < outputNum; ++i) { OpBuilder::InsertionGuard insertGuard(rewriter); - // Create loop. - BuildKrnlLoop outputLoops(rewriter, loc, rank); - outputLoops.createDefineAndIterateOp(allocs[i]); - rewriter.setInsertionPointToStart(outputLoops.getIterateBlock()); // Scope for krnl ops IndexExprScope childScope(&rewriter, shapeHelper.scope); + KrnlBuilder createKrnl(rewriter, loc); + ValueRange loopDef = createKrnl.defineLoops(rank); + SmallVector lbs(rank, LiteralIndexExpr(0)); + + MemRefBoundsIndexCapture allocsBounds(allocs[i]); + SmallVector ubs; + allocsBounds.getDimList(ubs); + + createKrnl.iterateIE(loopDef, loopDef, lbs, ubs, + [&](KrnlBuilder &createKrnl, ValueRange indices) { + SmallVector readIndices; + for (uint64_t r = 0; r < rank; ++r) { + DimIndexExpr readIndex(indices[r]); + // Compute read index for the split axis. + if (r == axis) + for (unsigned k = 0; k < i; ++k) { + SymbolIndexExpr splitDim(shapeHelper.dimsForOutput(k)[r]); + readIndex = readIndex + splitDim; + } - // Indices for the read and write. - SmallVector readIndices; - SmallVector writeIndices; - for (int r = 0; r < rank; ++r) { - Value readVal = outputLoops.getInductionVar(r); - // If not the split axis, same index for read and write - IndexExpr readIndex = DimIndexExpr(readVal); - DimIndexExpr writeIndex(readVal); - // If the split axis, compute read index for the split axis. - if (r == axis) { - for (unsigned int k = 0; k < i; ++k) { - IndexExpr splitDim = SymbolIndexExpr(shapeHelper.dimsForOutput(k)[r]); - readIndex = readIndex + splitDim; - } - } - readIndices.emplace_back(readIndex); - writeIndices.emplace_back(writeIndex); - } - // Insert copy. - Value loadData = createKrnl.loadIE(operandAdaptor.input(), readIndices); - createKrnl.storeIE(loadData, allocs[i], writeIndices); + readIndices.emplace_back(readIndex); + } + + // Insert copy. + Value loadData = + createKrnl.loadIE(operandAdaptor.input(), readIndices); + createKrnl.store(loadData, allocs[i], indices); + }); } + rewriter.replaceOp(op, allocs); + return success(); } diff --git a/src/Conversion/ONNXToKrnl/Tensor/Tile.cpp b/src/Conversion/ONNXToKrnl/Tensor/Tile.cpp index 82fa6ec4e64b..c2cad4c7444c 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Tile.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Tile.cpp @@ -71,48 +71,35 @@ struct ONNXTileOpLowering : public ConversionPattern { (void)shapecomputed; assert(!failed(shapecomputed) && "expected to succeed"); - MemRefType outputMemRefType = convertToMemRefType(*op->result_type_begin()); - auto outputMemRefShape = outputMemRefType.getShape(); - int64_t outputRank = outputMemRefShape.size(); + MemRefType memRefType = convertToMemRefType(*op->result_type_begin()); + llvm::ArrayRef memRefShape = memRefType.getShape(); + uint64_t outputRank = memRefShape.size(); Value input = operandAdaptor.input(); - Value alloc = insertAllocAndDeallocSimple( - rewriter, op, outputMemRefType, loc, shapeHelper.dimsForOutput(0)); - - // Define loops and iteration trip counts (equivalent to size of output) - BuildKrnlLoop outputLoops(rewriter, loc, outputRank); - outputLoops.createDefineOp(); - outputLoops.pushAllBounds(shapeHelper.dimsForOutput(0)); - outputLoops.createIterateOp(); - rewriter.setInsertionPointToStart(outputLoops.getIterateBlock()); - - SmallVector loadIndices; - // This implementation is to iterate the output tensor. - // The store has simple affine subscript expression. - // Alternative implementation is to iterate the input tensor and repeats. - // The load of elements in input tensor can be reused explicitly. - // But the subscript of store is not contigious, or even not affine. - // Alternative implementation can be found at the end of this file. - - for (int64_t i = 0; i < outputRank; i++) { - // Scope is created for each dimension because they are independent - IndexExprScope IEScope(&rewriter, loc); - DimIndexExpr index(outputLoops.getInductionVar(i)); - MemRefBoundsIndexCapture inputBounds(input); - DimIndexExpr dimSize(inputBounds.getDim(i)); - IndexExpr exprVal = index % dimSize; - loadIndices.emplace_back(exprVal.getValue()); - } - - MultiDialectBuilder create( - rewriter, loc); - Value loadVal = create.krnl.load(input, loadIndices); - - SmallVector storeIndices; - for (int64_t i = 0; i < outputRank; ++i) - storeIndices.emplace_back(outputLoops.getInductionVar(i)); - create.krnl.store(loadVal, alloc, storeIndices); + rewriter, op, memRefType, loc, shapeHelper.dimsForOutput()); + + KrnlBuilder createKrnl(rewriter, loc); + ValueRange loopDef = createKrnl.defineLoops(outputRank); + SmallVector lbs(outputRank, LiteralIndexExpr(0)); + + MemRefBoundsIndexCapture inputBounds(input); + createKrnl.iterateIE(loopDef, loopDef, lbs, shapeHelper.dimsForOutput(), + [&](KrnlBuilder &createKrnl, ValueRange indices) { + // Compute the indices used by the input tensor load operation. + // Note: An alternative implementation can be found at the end of this + // file. + SmallVector loadIndices; + for (uint64_t i = 0; i < outputRank; ++i) { + DimIndexExpr index(indices[i]); + DimIndexExpr dimSize(inputBounds.getDim(i)); + IndexExpr exprVal = index % dimSize; + loadIndices.emplace_back(exprVal.getValue()); + } + + Value loadVal = createKrnl.load(input, loadIndices); + createKrnl.store(loadVal, alloc, indices); + }); rewriter.replaceOp(op, alloc); diff --git a/src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp b/src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp index 50af8e347049..b528280acc19 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp @@ -34,7 +34,7 @@ struct ONNXTransposeOpLowering : public ConversionPattern { // Basic information. auto memRefType = convertToMemRefType(*op->result_type_begin()); - int64_t rank = memRefType.getShape().size(); + uint64_t rank = memRefType.getShape().size(); // Get a shape helper. ONNXTransposeOpShapeHelper shapeHelper(&transposeOp, &rewriter, @@ -46,32 +46,28 @@ struct ONNXTransposeOpLowering : public ConversionPattern { // Insert an allocation and deallocation for the result of this operation. Value alloc = insertAllocAndDeallocSimple( - rewriter, op, memRefType, loc, shapeHelper.dimsForOutput(0)); - - // Create loop. - BuildKrnlLoop inputLoops(rewriter, loc, rank); - inputLoops.createDefineAndIterateOp(data); - rewriter.setInsertionPointToStart(inputLoops.getIterateBlock()); - { - // Get a child IndexExpr context. - IndexExprScope childScope(&rewriter, shapeHelper.scope); - KrnlBuilder createKrnl(rewriter, loc); - - // Get read/write indices. - SmallVector readIndices; - SmallVector writeIndices; - for (decltype(rank) i = 0; i < rank; ++i) { - Value readVal = inputLoops.getInductionVar(i); - Value writeVal = - inputLoops.getInductionVar(ArrayAttrIntVal(permAttr, i)); - readIndices.emplace_back(DimIndexExpr(readVal)); - writeIndices.emplace_back(DimIndexExpr(writeVal)); - } - - // Copy data. - Value loadData = createKrnl.loadIE(data, readIndices); - createKrnl.storeIE(loadData, alloc, writeIndices); - } + rewriter, op, memRefType, loc, shapeHelper.dimsForOutput()); + + KrnlBuilder createKrnl(rewriter, loc); + ValueRange loopDef = createKrnl.defineLoops(rank); + SmallVector lbs(rank, LiteralIndexExpr(0)); + + MemRefBoundsIndexCapture dataBounds(data); + SmallVector ubs; + dataBounds.getDimList(ubs); + + createKrnl.iterateIE(loopDef, loopDef, lbs, ubs, + [&](KrnlBuilder &createKrnl, ValueRange indices) { + // Compute the indices used by the load operation. + SmallVector storeIndices; + for (uint64_t i = 0; i < rank; ++i) { + Value index = indices[ArrayAttrIntVal(permAttr, i)]; + storeIndices.emplace_back(DimIndexExpr(index)); + } + + Value loadData = createKrnl.load(data, indices); + createKrnl.storeIE(loadData, alloc, storeIndices); + }); rewriter.replaceOp(op, alloc); diff --git a/src/Dialect/ONNX/CMakeLists.txt b/src/Dialect/ONNX/CMakeLists.txt index dbe1a0a4319b..5a8efcf63185 100644 --- a/src/Dialect/ONNX/CMakeLists.txt +++ b/src/Dialect/ONNX/CMakeLists.txt @@ -24,6 +24,7 @@ add_onnx_mlir_library(OMONNXOps ShapeInference/ArgMax.cpp ShapeInference/AveragePool.cpp ShapeInference/CategoryMapper.cpp + ShapeInference/Clip.cpp ShapeInference/Compress.cpp ShapeInference/Concat.cpp ShapeInference/Conv.cpp diff --git a/src/Dialect/ONNX/ShapeInference/Clip.cpp b/src/Dialect/ONNX/ShapeInference/Clip.cpp new file mode 100644 index 000000000000..d91f4466e092 --- /dev/null +++ b/src/Dialect/ONNX/ShapeInference/Clip.cpp @@ -0,0 +1,41 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + */ + +//===--------------- Clip.cpp - Shape Inference for Clip Op ---------------===// +// +// Copyright 2022 The IBM Research Authors. +// +// ============================================================================= +// +// This file implements shape inference for the ONNX Clip operator. +// +//===----------------------------------------------------------------------===// + +#include "src/Dialect/ONNX/ONNXOpsHelper.hpp" +#include "src/Dialect/ONNX/ShapeInference/ONNXShapeHelper.hpp" + +ONNXClipOpShapeHelper::ONNXClipOpShapeHelper(ONNXClipOp *newOp) + : ONNXOpShapeHelper( + newOp, newOp->getOperation()->getNumResults()) {} + +ONNXClipOpShapeHelper::ONNXClipOpShapeHelper(ONNXClipOp *newOp, + OpBuilder *rewriter, ArrayValueIndexCapture::GetDenseVal fGetDenseVal, + ArrayValueIndexCapture::LoadVal fLoadVal) + : ONNXOpShapeHelper(newOp, + newOp->getOperation()->getNumResults(), rewriter, fGetDenseVal, + fLoadVal) {} + +LogicalResult ONNXClipOpShapeHelper::computeShape( + ONNXClipOpAdaptor operandAdaptor) { + Value input = operandAdaptor.input(); + MemRefBoundsIndexCapture bounds(input); + int64_t rank = bounds.getRank(); + + DimsExpr outputDims(rank); + for (int64_t i = 0; i < rank; ++i) + outputDims[i] = bounds.getDim(i); + dimsForOutput() = outputDims; + + return success(); +} diff --git a/src/Dialect/ONNX/ShapeInference/ONNXShapeHelper.cpp b/src/Dialect/ONNX/ShapeInference/ONNXShapeHelper.cpp index a0b8c67419e9..7323feaa3ece 100644 --- a/src/Dialect/ONNX/ShapeInference/ONNXShapeHelper.cpp +++ b/src/Dialect/ONNX/ShapeInference/ONNXShapeHelper.cpp @@ -405,6 +405,7 @@ LogicalResult ONNXGenericPoolShapeHelper::computeShape( template struct ONNXOpShapeHelper; template struct ONNXOpShapeHelper; template struct ONNXOpShapeHelper; +template struct ONNXOpShapeHelper; template struct ONNXOpShapeHelper; template struct ONNXOpShapeHelper; template struct ONNXOpShapeHelper; diff --git a/src/Dialect/ONNX/ShapeInference/ONNXShapeHelper.hpp b/src/Dialect/ONNX/ShapeInference/ONNXShapeHelper.hpp index 840b0d7ba7e5..83a71dd6baa4 100644 --- a/src/Dialect/ONNX/ShapeInference/ONNXShapeHelper.hpp +++ b/src/Dialect/ONNX/ShapeInference/ONNXShapeHelper.hpp @@ -181,6 +181,15 @@ struct ONNXArgMaxOpShapeHelper : public ONNXOpShapeHelper { LogicalResult computeShape(ONNXArgMaxOpAdaptor operandAdaptor); }; +// Shape for Clip. +struct ONNXClipOpShapeHelper : public ONNXOpShapeHelper { + ONNXClipOpShapeHelper(ONNXClipOp *newOp); + ONNXClipOpShapeHelper(ONNXClipOp *newOp, OpBuilder *rewriter, + ArrayValueIndexCapture::GetDenseVal fGetDenseVal, + ArrayValueIndexCapture::LoadVal fLoadVal); + LogicalResult computeShape(ONNXClipOpAdaptor operandAdaptor); +}; + // Shape for concat struct ONNXConcatOpShapeHelper : public ONNXOpShapeHelper { ONNXConcatOpShapeHelper(ONNXConcatOp *newOp); diff --git a/test/mlir/onnx/onnx_lowering.mlir b/test/mlir/onnx/onnx_lowering.mlir index ce4c915535c9..36ed7dc325ec 100644 --- a/test/mlir/onnx/onnx_lowering.mlir +++ b/test/mlir/onnx/onnx_lowering.mlir @@ -1001,20 +1001,23 @@ func private @test_transpose(%arg0 : tensor<10x20x30x40xf32>) -> tensor<*xf32> { "std.return"(%1) : (tensor<*xf32>) -> () // CHECK-LABEL: test_transpose - // CHECK: [[RES1:%.+]] = memref.alloc() {{.*}}: memref<40x30x20x10xf32> - - // CHECK: [[DEF_LOOPS:%.+]]:4 = krnl.define_loops 4 - // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1, [[DEF_LOOPS]]#2, [[DEF_LOOPS]]#3) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg2 = 0 to 20, [[DEF_LOOPS]]#2 -> %arg3 = 0 to 30, [[DEF_LOOPS]]#3 -> %arg4 = 0 to 40){ - // CHECK: [[LOAD:%.+]] = krnl.load %arg0[%arg1, %arg2, %arg3, %arg4] : memref<10x20x30x40xf32> - // CHECK: krnl.store [[LOAD]], [[RES1]][%arg4, %arg3, %arg2, %arg1] : memref<40x30x20x10xf32> - - // CHECK: [[RES0:%.+]] = memref.alloc() {{.*}}: memref<40x10x30x20xf32> - // CHECK: [[DEF_LOOPS:%.+]]:4 = krnl.define_loops 4 - // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1, [[DEF_LOOPS]]#2, [[DEF_LOOPS]]#3) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to 40, [[DEF_LOOPS]]#1 -> %arg2 = 0 to 30, [[DEF_LOOPS]]#2 -> %arg3 = 0 to 20, [[DEF_LOOPS]]#3 -> %arg4 = 0 to 10){ - // CHECK: [[LOAD:%.+]] = krnl.load [[RES1]][%arg1, %arg2, %arg3, %arg4] : memref<40x30x20x10xf32> - // CHECK: krnl.store [[LOAD]], [[RES0]][%arg1, %arg4, %arg2, %arg3] : memref<40x10x30x20xf32> - - // CHECK: return [[RES0]] : memref<40x10x30x20xf32> + // CHECK: [[RES1:%.+]] = memref.alloc() {{.*}}: memref<40x30x20x10xf32> + // CHECK: [[DEF_LOOPS:%.+]]:4 = krnl.define_loops 4 + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1, [[DEF_LOOPS]]#2, [[DEF_LOOPS]]#3) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to 10, + // CHECK-SAME: [[DEF_LOOPS]]#1 -> %arg2 = 0 to 20, [[DEF_LOOPS]]#2 -> %arg3 = 0 to 30, [[DEF_LOOPS]]#3 -> %arg4 = 0 to 40){ + // CHECK-NEXT: [[IV:%.+]]:4 = krnl.get_induction_var_value([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1, [[DEF_LOOPS]]#2, [[DEF_LOOPS]]#3) : + // CHECK-SAME: (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index, index) + // CHECK: [[LOAD:%.+]] = krnl.load %arg0{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2, [[IV]]#3{{.}} : memref<10x20x30x40xf32> + // CHECK: krnl.store [[LOAD]], [[RES1]]{{.}}[[IV]]#3, [[IV]]#2, [[IV]]#1, [[IV]]#0{{.}} : memref<40x30x20x10xf32> + // CHECK: [[RES0:%.+]] = memref.alloc() {{.*}}: memref<40x10x30x20xf32> + // CHECK: [[DEF_LOOPS1:%.+]]:4 = krnl.define_loops 4 + // CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1, [[DEF_LOOPS1]]#2, [[DEF_LOOPS1]]#3) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 40, + // CHECK-SAME: [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOPS1]]#2 -> %arg3 = 0 to 20, [[DEF_LOOPS1]]#3 -> %arg4 = 0 to 10){ + // CHECK-NEXT: [[IV1:%.+]]:4 = krnl.get_induction_var_value([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1, [[DEF_LOOPS1]]#2, [[DEF_LOOPS1]]#3) : + // CHECK-SAME: (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index, index) + // CHECK: [[LOAD:%.+]] = krnl.load [[RES1]]{{.}}[[IV1]]#0, [[IV1]]#1, [[IV1]]#2, [[IV1]]#3{{.}} : memref<40x30x20x10xf32> + // CHECK: krnl.store [[LOAD]], [[RES0]]{{.}}[[IV1]]#0, [[IV1]]#3, [[IV1]]#1, [[IV1]]#2{{.}} : memref<40x10x30x20xf32> + // CHECK: return [[RES0]] : memref<40x10x30x20xf32> } // ----- @@ -1023,19 +1026,25 @@ func private @test_transpose(%arg0 : tensor<10x20x30x40xf32>) -> tensor<*xf32> { func private @test_transpose_dynamic_dims(%arg0 : tensor<10x?x30x40xf32>) -> tensor<*xf32> { %0 = "onnx.Transpose"(%arg0) {perm = [0, 3, 1, 2]} : (tensor<10x?x30x40xf32>) -> tensor<*xf32> "std.return"(%0) : (tensor<*xf32>) -> () + + // CHECK: [[MAP:#.+]] = affine_map<(d0) -> (d0)> // CHECK-LABEL: func private @test_transpose_dynamic_dims - // CHECK-SAME: ([[PARAM_0_:%.+]]: memref<10x?x30x40xf32>) -> memref<10x40x?x30xf32> { - // CHECK: [[CST_1_:%.+]] = arith.constant 1 : index - // CHECK: [[DIM_0_:%.+]] = memref.dim [[PARAM_0_]], [[CST_1_]] : memref<10x?x30x40xf32> - // CHECK-DAG: [[RES_:%.+]] = memref.alloc([[DIM_0_]]) {{.*}}: memref<10x40x?x30xf32> - // CHECK-DAG: [[LOOP_0_:%.+]]:4 = krnl.define_loops 4 - // CHECK-DAG: [[CST_1_1_:%.+]] = arith.constant 1 : index - // CHECK: [[DIM_1_:%.+]] = memref.dim [[PARAM_0_]], [[CST_1_1_]] : memref<10x?x30x40xf32> - // CHECK: krnl.iterate([[LOOP_0_]]#0, [[LOOP_0_]]#1, [[LOOP_0_]]#2, [[LOOP_0_]]#3) with ([[LOOP_0_]]#0 -> [[I_0_:%.+]] = 0 to 10, [[LOOP_0_]]#1 -> [[I_1_:%.+]] = 0 to [[DIM_1_]], [[LOOP_0_]]#2 -> [[I_2_:%.+]] = 0 to 30, [[LOOP_0_]]#3 -> [[I_3_:%.+]] = 0 to 40){ - // CHECK: [[LOAD_PARAM_0_MEM_:%.+]] = krnl.load [[PARAM_0_]]{{.}}[[I_0_]], [[I_1_]], [[I_2_]], [[I_3_]]{{.}} : memref<10x?x30x40xf32> - // CHECK: krnl.store [[LOAD_PARAM_0_MEM_]], [[RES_]]{{.}}[[I_0_]], [[I_3_]], [[I_1_]], [[I_2_]]{{.}} : memref<10x40x?x30xf32> + // CHECK-SAME: ([[PARAM_0:%.+]]: memref<10x?x30x40xf32>) -> memref<10x40x?x30xf32> { + // CHECK: [[CST_1:%.+]] = arith.constant 1 : index + // CHECK: [[DIM_0:%.+]] = memref.dim [[PARAM_0]], [[CST_1]] : memref<10x?x30x40xf32> + // CHECK-DAG: [[RES:%.+]] = memref.alloc([[DIM_0]]) {{.*}}: memref<10x40x?x30xf32> + // CHECK-DAG: [[LOOP_0:%.+]]:4 = krnl.define_loops 4 + // CHECK-DAG: [[CST_1_1:%.+]] = arith.constant 1 : index + // CHECK: [[DIM_1:%.+]] = memref.dim [[PARAM_0]], [[CST_1_1]] : memref<10x?x30x40xf32> + // CHECK: krnl.iterate([[LOOP_0]]#0, [[LOOP_0]]#1, [[LOOP_0]]#2, [[LOOP_0]]#3) with ([[LOOP_0]]#0 -> [[I_0:%.+]] = 0 to 10, + // CHECK-SAME: [[LOOP_0]]#1 -> [[I_1:%.+]] = 0 to [[MAP]]{{.}}[[DIM_1]]{{.}}, [[LOOP_0]]#2 -> [[I_2:%.+]] = 0 to 30, + // CHECK-SAME: [[LOOP_0]]#3 -> [[I_3:%.+]] = 0 to 40){ + // CHECK-NEXT: [[IV:%.+]]:4 = krnl.get_induction_var_value([[LOOP_0]]#0, [[LOOP_0]]#1, [[LOOP_0]]#2, [[LOOP_0]]#3) : + // CHECK-SAME: (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index, index) + // CHECK: [[LOAD_PARAM_0_MEM:%.+]] = krnl.load [[PARAM_0]]{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2, [[IV]]#3{{.}} : memref<10x?x30x40xf32> + // CHECK: krnl.store [[LOAD_PARAM_0_MEM]], [[RES]]{{.}}[[IV]]#0, [[IV]]#3, [[IV]]#1, [[IV]]#2{{.}} : memref<10x40x?x30xf32> // CHECK: } - // CHECK: return [[RES_]] : memref<10x40x?x30xf32> + // CHECK: return [[RES]] : memref<10x40x?x30xf32> // CHECK: } } @@ -1469,21 +1478,26 @@ func private @test_split_equal(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, // CHECK: [[INDEX_MAP:#.+]] = affine_map<(d0) -> (d0 + 8)> // CHECK-LABEL: @test_split_equal - - // CHECK: [[RES_0:%.+]] = memref.alloc() {{.*}}: memref<8x32x64xf32> - // CHECK: [[RES_1:%.+]] = memref.alloc() {{.*}}: memref<8x32x64xf32> - // CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3 - // CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64){ - // CHECK: [[LOAD_0:%.+]] = krnl.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32> - // CHECK: krnl.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<8x32x64xf32> - // CHECK: } - // CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3 - // CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64){ - // CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP]](%arg1) - // CHECK: [[LOAD_1:%.+]] = krnl.load %arg0[%[[INDEX]], %arg2, %arg3] : memref<16x32x64xf32> - // CHECK: krnl.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<8x32x64xf32> - // CHECK: } - // CHECK: return [[RES_0]], [[RES_1]] : memref<8x32x64xf32>, memref<8x32x64xf32> + // CHECK: [[RES_0:%.+]] = memref.alloc() {{.*}}: memref<8x32x64xf32> + // CHECK: [[RES_1:%.+]] = memref.alloc() {{.*}}: memref<8x32x64xf32> + // CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3 + // CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 8, + // CHECK-SAME: [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64){ + // CHECK: [[IV:%.+]]:3 = krnl.get_induction_var_value([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) : + // CHECK-SAME: (!krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index) + // CHECK: [[LOAD_0:%.+]] = krnl.load %arg0{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref<16x32x64xf32> + // CHECK: krnl.store [[LOAD_0]], [[RES_0]]{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref<8x32x64xf32> + // CHECK: } + // CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3 + // CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 8, + // CHECK-SAME: [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64){ + // CHECK: [[IV:%.+]]:3 = krnl.get_induction_var_value([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) : + // CHECK-SAME: (!krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index) + // CHECK: [[INDEX:%.+]] = affine.apply [[INDEX_MAP]]{{.}}[[IV]]#0{{.}} + // CHECK: [[LOAD_1:%.+]] = krnl.load %arg0{{.}}[[INDEX]], [[IV]]#1, [[IV]]#2{{.}} : memref<16x32x64xf32> + // CHECK: krnl.store [[LOAD_1]], [[RES_1]]{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref<8x32x64xf32> + // CHECK: } + // CHECK: return [[RES_0]], [[RES_1]] : memref<8x32x64xf32>, memref<8x32x64xf32> } // ----- @@ -1500,14 +1514,16 @@ func private @test_split_variable(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32 // CHECK: [[RES_1:%.+]] = memref.alloc() {{.*}}: memref<16x30x64xf32> // CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3 // CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64){ - // CHECK: [[LOAD_0:%.+]] = krnl.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32> - // CHECK: krnl.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<16x2x64xf32> + // CHECK: [[IV:%.+]]:3 = krnl.get_induction_var_value([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) : (!krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index) + // CHECK: [[LOAD_0:%.+]] = krnl.load %arg0{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref<16x32x64xf32> + // CHECK: krnl.store [[LOAD_0]], [[RES_0]]{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref<16x2x64xf32> // CHECK: } // CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3 // CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64){ - // CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP]](%arg2) - // CHECK: [[LOAD_1:%.+]] = krnl.load %arg0[%arg1, %[[INDEX]], %arg3] : memref<16x32x64xf32> - // CHECK: krnl.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<16x30x64xf32> + // CHECK: [[IV:%.+]]:3 = krnl.get_induction_var_value([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) : (!krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index) + // CHECK: [[INDEX:%.+]] = affine.apply [[INDEX_MAP]]{{.}}[[IV]]#1{{.}} + // CHECK: [[LOAD_1:%.+]] = krnl.load %arg0{{.}}[[IV]]#0, [[INDEX]], [[IV]]#2{{.}} : memref<16x32x64xf32> + // CHECK: krnl.store [[LOAD_1]], [[RES_1]]{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref<16x30x64xf32> // CHECK: } // CHECK: return [[RES_0]], [[RES_1]] : memref<16x2x64xf32>, memref<16x30x64xf32> } @@ -1525,14 +1541,16 @@ func private @test_splitv11_equal(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32 // CHECK: [[RES_1:%.+]] = memref.alloc() {{.*}}: memref<8x32x64xf32> // CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3 // CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64){ - // CHECK: [[LOAD_0:%.+]] = krnl.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32> - // CHECK: krnl.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<8x32x64xf32> + // CHECK: [[IV:%.+]]:3 = krnl.get_induction_var_value([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) : (!krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index) + // CHECK: [[LOAD_0:%.+]] = krnl.load %arg0{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref<16x32x64xf32> + // CHECK: krnl.store [[LOAD_0]], [[RES_0]]{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref<8x32x64xf32> // CHECK: } // CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3 // CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64){ - // CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP]](%arg1) - // CHECK: [[LOAD_1:%.+]] = krnl.load %arg0[%[[INDEX]], %arg2, %arg3] : memref<16x32x64xf32> - // CHECK: krnl.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<8x32x64xf32> + // CHECK: [[IV:%.+]]:3 = krnl.get_induction_var_value([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) : (!krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index) + // CHECK: [[INDEX:%.+]] = affine.apply [[INDEX_MAP]]{{.}}[[IV]]#0{{.}} + // CHECK: [[LOAD_1:%.+]] = krnl.load %arg0{{.}}[[INDEX]], [[IV]]#1, [[IV]]#2{{.}} : memref<16x32x64xf32> + // CHECK: krnl.store [[LOAD_1]], [[RES_1]]{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref<8x32x64xf32> // CHECK: } // CHECK: return [[RES_0]], [[RES_1]] : memref<8x32x64xf32>, memref<8x32x64xf32> } @@ -1550,14 +1568,15 @@ func private @test_splitv11_variable(%arg0 : tensor<16x32x64xf32>) -> (tensor<*x // CHECK: [[RES_1:%.+]] = memref.alloc() {{.*}}: memref<16x30x64xf32> // CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3 // CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64){ - // CHECK: [[LOAD_0:%.+]] = krnl.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32> - // CHECK: krnl.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<16x2x64xf32> + // CHECK: [[IV:%.+]]:3 = krnl.get_induction_var_value([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) : (!krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index) + // CHECK: [[LOAD_0:%.+]] = krnl.load %arg0{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref<16x32x64xf32> + // CHECK: krnl.store [[LOAD_0]], [[RES_0]]{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref<16x2x64xf32> // CHECK: } // CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3 // CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64){ - // CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP]](%arg2) - // CHECK: [[LOAD_1:%.+]] = krnl.load %arg0[%arg1, %[[INDEX]], %arg3] : memref<16x32x64xf32> - // CHECK: krnl.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<16x30x64xf32> + // CHECK: [[INDEX:%.+]] = affine.apply [[INDEX_MAP]]{{.}}[[IV]]#1{{.}} + // CHECK: [[LOAD_1:%.+]] = krnl.load %arg0{{.}}[[IV]]#0, [[INDEX]], [[IV]]#2{{.}} : memref<16x32x64xf32> + // CHECK: krnl.store [[LOAD_1]], [[RES_1]]{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref<16x30x64xf32> // CHECK: } // CHECK: return [[RES_0]], [[RES_1]] : memref<16x2x64xf32>, memref<16x30x64xf32> } @@ -2019,20 +2038,21 @@ func private @test_clip(%arg0: tensor<3xf32>, %arg1: tensor, %arg2: tensor< return %0 : tensor<3xf32> // CHECK-LABEL: test_clip -// CHECK-SAME: ([[INPUT_:%.+]]: memref<3xf32>, [[MIN_:%.+]]: memref, [[MAX_:%.+]]: memref) -> memref<3xf32> attributes {input_names = ["x", "min", "max"], output_names = ["y"]} { -// CHECK-DAG: [[RES_:%.+]] = memref.alloc() {{.*}}: memref<3xf32> -// CHECK-DAG: [[LOOP_0_:%.+]] = krnl.define_loops 1 -// CHECK: krnl.iterate([[LOOP_0_]]) with ([[LOOP_0_]] -> [[I_0_:%.+]] = 0 to 3){ -// CHECK-DAG: [[LOAD_INPUT_MEM_:%.+]] = krnl.load [[INPUT_]]{{.}}[[I_0_]]{{.}} : memref<3xf32> -// CHECK-DAG: [[LOAD_MIN_MEM_:%.+]] = krnl.load [[MIN_]][] : memref -// CHECK: [[VAR_4_:%.+]] = arith.cmpf olt, [[LOAD_INPUT_MEM_]], [[LOAD_MIN_MEM_]] : f32 -// CHECK-DAG: [[VAR_5_:%.+]] = arith.select [[VAR_4_]], [[LOAD_MIN_MEM_]], [[LOAD_INPUT_MEM_]] : f32 -// CHECK-DAG: [[LOAD_MAX_MEM_:%.+]] = krnl.load [[MAX_]][] : memref -// CHECK: [[VAR_7_:%.+]] = arith.cmpf olt, [[VAR_5_]], [[LOAD_MAX_MEM_]] : f32 -// CHECK: [[VAR_8_:%.+]] = arith.select [[VAR_7_]], [[VAR_5_]], [[LOAD_MAX_MEM_]] : f32 -// CHECK: krnl.store [[VAR_8_]], [[RES_]]{{.}}[[I_0_]]{{.}} : memref<3xf32> +// CHECK-SAME: ([[INPUT:%.+]]: memref<3xf32>, [[MIN:%.+]]: memref, [[MAX:%.+]]: memref) -> memref<3xf32> attributes {input_names = ["x", "min", "max"], output_names = ["y"]} { +// CHECK-DAG: [[RES:%.+]] = memref.alloc() {{.*}}: memref<3xf32> +// CHECK-DAG: [[LOOP_0:%.+]] = krnl.define_loops 1 +// CHECK: krnl.iterate([[LOOP_0]]) with ([[LOOP_0]] -> [[I_0:%.+]] = 0 to 3){ +// CHECK-NEXT: [[IV:%.+]] = krnl.get_induction_var_value([[LOOP_0]]) : (!krnl.loop) -> index +// CHECK-DAG: [[LOAD_INPUT_MEM:%.+]] = krnl.load [[INPUT]]{{.}}[[IV]]{{.}} : memref<3xf32> +// CHECK-DAG: [[LOAD_MIN_MEM:%.+]] = krnl.load [[MIN]][] : memref +// CHECK: [[VAR_4:%.+]] = arith.cmpf olt, [[LOAD_INPUT_MEM]], [[LOAD_MIN_MEM]] : f32 +// CHECK-DAG: [[VAR_5:%.+]] = arith.select [[VAR_4]], [[LOAD_MIN_MEM]], [[LOAD_INPUT_MEM]] : f32 +// CHECK-DAG: [[LOAD_MAX_MEM:%.+]] = krnl.load [[MAX]][] : memref +// CHECK: [[VAR_7:%.+]] = arith.cmpf olt, [[VAR_5]], [[LOAD_MAX_MEM]] : f32 +// CHECK: [[VAR_8:%.+]] = arith.select [[VAR_7]], [[VAR_5]], [[LOAD_MAX_MEM]] : f32 +// CHECK: krnl.store [[VAR_8]], [[RES]]{{.}}[[IV]]{{.}} : memref<3xf32> // CHECK: } -// CHECK: return [[RES_]] : memref<3xf32> +// CHECK: return [[RES]] : memref<3xf32> // CHECK: } } @@ -2044,17 +2064,18 @@ func private @test_clip_default_min(%arg0: tensor<3xf32>, %arg1: tensor, %a return %0 : tensor<3xf32> // CHECK-LABEL: test_clip_default_min -// CHECK-SAME: ([[INPUT_:%.+]]: memref<3xf32>, [[MIN_:%.+]]: memref, [[MAX_:%.+]]: memref) -> memref<3xf32> attributes {input_names = ["x", "min", "max"], output_names = ["y"]} { -// CHECK-DAG: [[RES_:%.+]] = memref.alloc() {{.*}}: memref<3xf32> -// CHECK-DAG: [[LOOP_0_:%.+]] = krnl.define_loops 1 -// CHECK: krnl.iterate([[LOOP_0_]]) with ([[LOOP_0_]] -> [[I_0_:%.+]] = 0 to 3){ -// CHECK-DAG: [[LOAD_INPUT_MEM_:%.+]] = krnl.load [[INPUT_]]{{.}}[[I_0_]]{{.}} : memref<3xf32> -// CHECK-DAG: [[LOAD_MAX_MEM_:%.+]] = krnl.load [[MAX_]][] : memref -// CHECK: [[VAR_7_:%.+]] = arith.cmpf olt, [[LOAD_INPUT_MEM_]], [[LOAD_MAX_MEM_]] : f32 -// CHECK: [[VAR_8_:%.+]] = arith.select [[VAR_7_]], [[LOAD_INPUT_MEM_]], [[LOAD_MAX_MEM_]] : f32 -// CHECK: krnl.store [[VAR_8_]], [[RES_]]{{.}}[[I_0_]]{{.}} : memref<3xf32> +// CHECK-SAME: ([[INPUT:%.+]]: memref<3xf32>, [[MIN:%.+]]: memref, [[MAX:%.+]]: memref) -> memref<3xf32> attributes {input_names = ["x", "min", "max"], output_names = ["y"]} { +// CHECK-DAG: [[RES:%.+]] = memref.alloc() {{.*}}: memref<3xf32> +// CHECK-DAG: [[LOOP_0:%.+]] = krnl.define_loops 1 +// CHECK: krnl.iterate([[LOOP_0]]) with ([[LOOP_0]] -> [[I_0:%.+]] = 0 to 3){ +// CHECK-NEXT: [[IV:%.+]] = krnl.get_induction_var_value([[LOOP_0]]) : (!krnl.loop) -> index +// CHECK-DAG: [[LOAD_INPUT_MEM:%.+]] = krnl.load [[INPUT]]{{.}}[[IV]]{{.}} : memref<3xf32> +// CHECK-DAG: [[LOAD_MAX_MEM:%.+]] = krnl.load [[MAX]][] : memref +// CHECK: [[VAR_7:%.+]] = arith.cmpf olt, [[LOAD_INPUT_MEM]], [[LOAD_MAX_MEM]] : f32 +// CHECK: [[VAR_8:%.+]] = arith.select [[VAR_7]], [[LOAD_INPUT_MEM]], [[LOAD_MAX_MEM]] : f32 +// CHECK: krnl.store [[VAR_8]], [[RES]]{{.}}[[IV]]{{.}} : memref<3xf32> // CHECK: } -// CHECK: return [[RES_]] : memref<3xf32> +// CHECK: return [[RES]] : memref<3xf32> // CHECK: } } diff --git a/test/mlir/onnx/onnx_lowering_with_canonicalize.mlir b/test/mlir/onnx/onnx_lowering_with_canonicalize.mlir index 7e8be5a9c66e..1ca08f37824f 100644 --- a/test/mlir/onnx/onnx_lowering_with_canonicalize.mlir +++ b/test/mlir/onnx/onnx_lowering_with_canonicalize.mlir @@ -356,17 +356,20 @@ func @test_tile1(%arg0 : tensor<4x8xf32>) -> tensor<*xf32> { %1 = "onnx.Tile"(%arg0, %0) : (tensor<4x8xf32>, tensor<2xi64>) -> tensor<*xf32> return %1 : tensor<*xf32> +// CHECK-DAG: [[MAP0:#map.+]] = affine_map<(d0) -> (d0 mod 4)> +// CHECK-DAG: [[MAP1:#map.+]] = affine_map<(d0) -> (d0 mod 8)> // CHECK-LABEL: func @test_tile1 -// CHECK-SAME: ([[PARAM_0_:%.+]]: memref<4x8xf32>) -> memref<12x16xf32> { -// CHECK-DAG: [[RES_:%.+]] = memref.alloc() {{.*}}: memref<12x16xf32> -// CHECK-DAG: [[LOOP_0_:%.+]]:2 = krnl.define_loops 2 -// CHECK: krnl.iterate([[LOOP_0_]]#0, [[LOOP_0_]]#1) with ([[LOOP_0_]]#0 -> [[I_0_:%.+]] = 0 to 12, [[LOOP_0_]]#1 -> [[I_1_:%.+]] = 0 to 16){ -// CHECK-DAG: [[VAR_3_:%.+]] = affine.apply #map0([[I_0_]]) -// CHECK-DAG: [[VAR_4_:%.+]] = affine.apply #map1([[I_1_]]) -// CHECK: [[LOAD_PARAM_0_MEM_:%.+]] = krnl.load [[PARAM_0_]]{{.}}[[VAR_3_]], [[VAR_4_]]{{.}} : memref<4x8xf32> -// CHECK: krnl.store [[LOAD_PARAM_0_MEM_]], [[RES_]]{{.}}[[I_0_]], [[I_1_]]{{.}} : memref<12x16xf32> +// CHECK-SAME: ([[PARAM_0:%.+]]: memref<4x8xf32>) -> memref<12x16xf32> { +// CHECK-DAG: [[RES:%.+]] = memref.alloc() {{.*}}: memref<12x16xf32> +// CHECK-DAG: [[LOOP_0:%.+]]:2 = krnl.define_loops 2 +// CHECK: krnl.iterate([[LOOP_0]]#0, [[LOOP_0]]#1) with ([[LOOP_0]]#0 -> [[I_0:%.+]] = 0 to 12, [[LOOP_0]]#1 -> [[I_1:%.+]] = 0 to 16){ +// CHECK-NEXT: [[IV:%.+]]:2 = krnl.get_induction_var_value([[LOOP_0]]#0, [[LOOP_0]]#1) : (!krnl.loop, !krnl.loop) -> (index, index) +// CHECK-DAG: [[VAR_3:%.+]] = affine.apply [[MAP0]]([[IV]]#0) +// CHECK-DAG: [[VAR_4:%.+]] = affine.apply [[MAP1]]([[IV]]#1) +// CHECK: [[LOAD_PARAM_0_MEM:%.+]] = krnl.load [[PARAM_0]]{{.}}[[VAR_3]], [[VAR_4]]{{.}} : memref<4x8xf32> +// CHECK: krnl.store [[LOAD_PARAM_0_MEM]], [[RES]]{{.}}[[IV]]#0, [[IV]]#1{{.}} : memref<12x16xf32> // CHECK: } -// CHECK: return [[RES_]] : memref<12x16xf32> +// CHECK: return [[RES]] : memref<12x16xf32> // CHECK: } } @@ -377,20 +380,23 @@ func @test_tile2(%arg0 : tensor<8xf32>, %arg1 : tensor<1xi64>) -> tensor<*xf32> %1 = "onnx.Tile"(%arg0, %arg1) : (tensor<8xf32>, tensor<1xi64>) -> tensor<*xf32> return %1 : tensor<*xf32> +// CHECK-DAG: [[MAP0:#map.+]] = affine_map<()[s0] -> (s0 * 8)> +// CHECK-DAG: [[MAP1:#map.+]] = affine_map<(d0) -> (d0 mod 8)> // CHECK-LABEL: func @test_tile2 -// CHECK-SAME: ([[PARAM_0_:%.+]]: memref<8xf32>, [[PARAM_1_:%.+]]: memref<1xi64>) -> memref { -// CHECK-DAG: [[CST_0_:%.+]] = arith.constant 0 : index -// CHECK: [[LOAD_PARAM_1_MEM_:%.+]] = krnl.load [[PARAM_1_]]{{\[}}[[CST_0_]]{{\]}} : memref<1xi64> -// CHECK: [[VAR_1_:%.+]] = arith.index_cast [[LOAD_PARAM_1_MEM_]] : i64 to index -// CHECK: [[VAR_2_:%.+]] = affine.apply #map0(){{.}}[[VAR_1_]]{{.}} -// CHECK-DAG: [[RES_:%.+]] = memref.alloc([[VAR_2_]]) {{.*}} : memref -// CHECK-DAG: [[LOOP_0_:%.+]] = krnl.define_loops 1 -// CHECK: krnl.iterate([[LOOP_0_]]) with ([[LOOP_0_]] -> [[I_0_:%.+]] = 0 to [[VAR_2_]]){ -// CHECK: [[VAR_5_:%.+]] = affine.apply #map1([[I_0_]]) -// CHECK: [[LOAD_PARAM_0_MEM_:%.+]] = krnl.load [[PARAM_0_]]{{.}}[[VAR_5_]]{{.}} : memref<8xf32> -// CHECK: krnl.store [[LOAD_PARAM_0_MEM_]], [[RES_]]{{.}}[[I_0_]]{{.}} : memref +// CHECK-SAME: ([[PARAM_0:%.+]]: memref<8xf32>, [[PARAM_1:%.+]]: memref<1xi64>) -> memref { +// CHECK-DAG: [[CST_0:%.+]] = arith.constant 0 : index +// CHECK: [[LOAD_PARAM_1_MEM:%.+]] = krnl.load [[PARAM_1]]{{\[}}[[CST_0]]{{\]}} : memref<1xi64> +// CHECK: [[VAR_1:%.+]] = arith.index_cast [[LOAD_PARAM_1_MEM]] : i64 to index +// CHECK: [[VAR_2:%.+]] = affine.apply [[MAP0]](){{.}}[[VAR_1]]{{.}} +// CHECK-DAG: [[RES:%.+]] = memref.alloc([[VAR_2]]) {{.*}} : memref +// CHECK-DAG: [[LOOP_0:%.+]] = krnl.define_loops 1 +// CHECK: krnl.iterate([[LOOP_0]]) with ([[LOOP_0]] -> [[I_0:%.+]] = 0 to [[MAP0]](){{.}}[[VAR_1]]{{.}}){ +// CHECK-NEXT: [[IV:%.+]] = krnl.get_induction_var_value([[LOOP_0]]) : (!krnl.loop) -> index +// CHECK: [[VAR_5:%.+]] = affine.apply [[MAP1]]([[IV]]) +// CHECK: [[LOAD_PARAM_0_MEM:%.+]] = krnl.load [[PARAM_0]]{{.}}[[VAR_5]]{{.}} : memref<8xf32> +// CHECK: krnl.store [[LOAD_PARAM_0_MEM]], [[RES]]{{.}}[[IV]]{{.}} : memref // CHECK: } -// CHECK: return [[RES_]] : memref +// CHECK: return [[RES]] : memref // CHECK: } } @@ -476,6 +482,8 @@ func @test_split_unknown_dimension(%arg0 : tensor) -> (tensor<*xf32> %0, %1 = "onnx.Split"(%arg0, %split) { axis = 1 : si64} : (tensor, tensor<2xi64>) -> (tensor<*xf32>, tensor<*xf32>) "std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> () +// CHECK: [[MAP0:#.+]] = affine_map<(d0) -> (d0)> +// CHECK: [[MAP1:#.+]] = affine_map<(d0) -> (d0 + 2)> // CHECK-LABEL: func @test_split_unknown_dimension // CHECK-SAME: ([[PARAM_0_:%.+]]: memref) -> (memref, memref) { // CHECK: [[CST_0_:%.+]] = arith.constant 0 : index @@ -484,16 +492,18 @@ func @test_split_unknown_dimension(%arg0 : tensor) -> (tensor<*xf32> // CHECK-NOT: separator of consecutive DAGs // CHECK-DAG: [[RES_:%.+]] = memref.alloc([[DIM_0_]]) {{.*}} : memref // CHECK-DAG: [[RES_1_:%.+]] = memref.alloc([[DIM_1_]]) {{.*}} : memref -// CHECK-DAG: [[LOOP_0_:%.+]]:3 = krnl.define_loops 3 -// CHECK: krnl.iterate([[LOOP_0_]]#0, [[LOOP_0_]]#1, [[LOOP_0_]]#2) with ([[LOOP_0_]]#0 -> [[I_0_:%.+]] = 0 to [[DIM_0_]], [[LOOP_0_]]#1 -> [[I_1_:%.+]] = 0 to 2, [[LOOP_0_]]#2 -> [[I_2_:%.+]] = 0 to 64){ -// CHECK: [[LOAD_PARAM_0_MEM_:%.+]] = krnl.load [[PARAM_0_]]{{.}}[[I_0_]], [[I_1_]], [[I_2_]]{{.}} : memref -// CHECK: krnl.store [[LOAD_PARAM_0_MEM_]], [[RES_]]{{.}}[[I_0_]], [[I_1_]], [[I_2_]]{{.}} : memref -// CHECK: } -// CHECK: [[LOOP_1_:%.+]]:3 = krnl.define_loops 3 -// CHECK: krnl.iterate([[LOOP_1_]]#0, [[LOOP_1_]]#1, [[LOOP_1_]]#2) with ([[LOOP_1_]]#0 -> [[I_3_:%.+]] = 0 to [[DIM_1_]], [[LOOP_1_]]#1 -> [[I_4_:%.+]] = 0 to 30, [[LOOP_1_]]#2 -> [[I_5_:%.+]] = 0 to 64){ -// CHECK: [[LOAD_PARAM_0_MEM_1_:%.+]] = affine.apply #map([[I_4_]]) -// CHECK: [[LOAD_PARAM_0_MEM_2_:%.+]] = krnl.load [[PARAM_0_]]{{.}}[[I_3_]], [[LOAD_PARAM_0_MEM_1_]], [[I_5_]]{{.}} : memref -// CHECK: krnl.store [[LOAD_PARAM_0_MEM_2_]], [[RES_1_]]{{.}}[[I_3_]], [[I_4_]], [[I_5_]]{{.}} : memref +// CHECK-DAG: [[LOOP_0:%.+]]:3 = krnl.define_loops 3 +// CHECK: krnl.iterate([[LOOP_0]]#0, [[LOOP_0]]#1, [[LOOP_0]]#2) with ([[LOOP_0]]#0 -> %arg1 = 0 to [[MAP0]]([[DIM_0_]]), [[LOOP_0]]#1 -> %arg2 = 0 to 2, [[LOOP_0]]#2 -> %arg3 = 0 to 64){ +// CHECK: [[IV:%.+]]:3 = krnl.get_induction_var_value([[LOOP_0]]#0, [[LOOP_0]]#1, [[LOOP_0]]#2) : (!krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index) +// CHECK: [[LOAD_PARAM_0_MEM_:%.+]] = krnl.load [[PARAM_0_]]{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref +// CHECK: krnl.store [[LOAD_PARAM_0_MEM_]], [[RES_]]{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref +// CHECK: } +// CHECK: [[LOOP_1:%.+]]:3 = krnl.define_loops 3 +// CHECK: krnl.iterate([[LOOP_1]]#0, [[LOOP_1]]#1, [[LOOP_1]]#2) with ([[LOOP_1]]#0 -> %arg1 = 0 to [[MAP0]]([[DIM_1_]]), [[LOOP_1]]#1 -> %arg2 = 0 to 30, [[LOOP_1]]#2 -> %arg3 = 0 to 64){ +// CHECK: [[IV:%.+]]:3 = krnl.get_induction_var_value([[LOOP_1]]#0, [[LOOP_1]]#1, [[LOOP_1]]#2) : (!krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index) +// CHECK: [[LOAD_PARAM_0_MEM_1_:%.+]] = affine.apply [[MAP1]]{{.}}[[IV]]#1{{.}} +// CHECK: [[LOAD_PARAM_0_MEM_2_:%.+]] = krnl.load [[PARAM_0_]]{{.}}[[IV]]#0, [[LOAD_PARAM_0_MEM_1_]], [[IV]]#2{{.}} : memref +// CHECK: krnl.store [[LOAD_PARAM_0_MEM_2_]], [[RES_1_]]{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref // CHECK: } // CHECK: return [[RES_]], [[RES_1_]] : memref, memref // CHECK: } @@ -507,7 +517,11 @@ func @test_split_unknown_dimension_equal_split(%arg0 : tensor) -> (t %0, %1 = "onnx.Split"(%arg0, %cst) { axis = 1 : si64 } : (tensor, none) -> (tensor<*xf32>, tensor<*xf32>) "std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> () -// CHECK-LABEL: func @test_split_unknown_dimension_equal_split +// CHECK: [[MAP0:#.+]] = affine_map<()[s0] -> (s0 ceildiv 2)> +// CHECK: [[MAP1:#.+]] = affine_map<(d0) -> (d0)> +// CHECK: [[MAP2:#.+]] = affine_map<(d0, d1) -> (d1)> +// CHECK: [[MAP3:#.+]] = affine_map<(d0)[s0] -> (d0 + s0 ceildiv 2)> +// CHECK-LABEL: func @test_split_unknown_dimension_equal_split // CHECK-SAME: ([[PARAM_0_:%.+]]: memref) -> (memref, memref) { // CHECK-DAG: [[CST_1_:%.+]] = arith.constant 1 : index // CHECK-DAG: [[CST_0_:%.+]] = arith.constant 0 : index @@ -520,16 +534,18 @@ func @test_split_unknown_dimension_equal_split(%arg0 : tensor) -> (t // CHECK-NOT: separator of consecutive DAGs // CHECK-DAG: [[RES_:%.+]] = memref.alloc([[DIM_1_]], [[VAR_3_]]) {{.*}} : memref // CHECK-DAG: [[RES_1_:%.+]] = memref.alloc([[DIM_2_]], [[VAR_5_]]) {{.*}} : memref -// CHECK-DAG: [[LOOP_0_:%.+]]:3 = krnl.define_loops 3 -// CHECK: krnl.iterate([[LOOP_0_]]#0, [[LOOP_0_]]#1, [[LOOP_0_]]#2) with ([[LOOP_0_]]#0 -> [[I_0_:%.+]] = 0 to [[DIM_1_]], [[LOOP_0_]]#1 -> [[I_1_:%.+]] = 0 to [[VAR_3_]], [[LOOP_0_]]#2 -> [[I_2_:%.+]] = 0 to 64){ -// CHECK: [[LOAD_PARAM_0_MEM_:%.+]] = krnl.load [[PARAM_0_]]{{.}}[[I_0_]], [[I_1_]], [[I_2_]]{{.}} : memref -// CHECK: krnl.store [[LOAD_PARAM_0_MEM_]], [[RES_]]{{.}}[[I_0_]], [[I_1_]], [[I_2_]]{{.}} : memref -// CHECK: } -// CHECK: [[LOOP_1_:%.+]]:3 = krnl.define_loops 3 -// CHECK: krnl.iterate([[LOOP_1_]]#0, [[LOOP_1_]]#1, [[LOOP_1_]]#2) with ([[LOOP_1_]]#0 -> [[I_3_:%.+]] = 0 to [[DIM_2_]], [[LOOP_1_]]#1 -> [[I_4_:%.+]] = 0 to [[VAR_5_]], [[LOOP_1_]]#2 -> [[I_5_:%.+]] = 0 to 64){ -// CHECK: [[LOAD_PARAM_0_MEM_1_:%.+]] = affine.apply #map1([[I_4_]]){{.}}[[DIM_0_]]{{.}} -// CHECK: [[LOAD_PARAM_0_MEM_2_:%.+]] = krnl.load [[PARAM_0_]]{{.}}[[I_3_]], [[LOAD_PARAM_0_MEM_1_]], [[I_5_]]{{.}} : memref -// CHECK: krnl.store [[LOAD_PARAM_0_MEM_2_]], [[RES_1_]]{{.}}[[I_3_]], [[I_4_]], [[I_5_]]{{.}} : memref +// CHECK-DAG: [[LOOP_0:%.+]]:3 = krnl.define_loops 3 +// CHECK: krnl.iterate([[LOOP_0]]#0, [[LOOP_0]]#1, [[LOOP_0]]#2) with ([[LOOP_0]]#0 -> %arg1 = 0 to [[MAP1]]([[DIM_1_]]), [[LOOP_0]]#1 -> %arg2 = 0 to [[MAP2]]([[DIM_1_]], [[VAR_3_]]), [[LOOP_0]]#2 -> %arg3 = 0 to 64){ +// CHECK: [[IV:%.+]]:3 = krnl.get_induction_var_value([[LOOP_0]]#0, [[LOOP_0]]#1, [[LOOP_0]]#2) : (!krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index) +// CHECK: [[LOAD_PARAM_0_MEM_:%.+]] = krnl.load [[PARAM_0_]]{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref +// CHECK: krnl.store [[LOAD_PARAM_0_MEM_]], [[RES_]]{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref +// CHECK: } +// CHECK: [[LOOP_1:%.+]]:3 = krnl.define_loops 3 +// CHECK: krnl.iterate([[LOOP_1]]#0, [[LOOP_1]]#1, [[LOOP_1]]#2) with ([[LOOP_1]]#0 -> %arg1 = 0 to [[MAP1]]([[DIM_2_]]), [[LOOP_1]]#1 -> %arg2 = 0 to [[MAP2]]([[DIM_2_]], [[VAR_5_]]), [[LOOP_1]]#2 -> %arg3 = 0 to 64){ +// CHECK: [[IV:%.+]]:3 = krnl.get_induction_var_value([[LOOP_1]]#0, [[LOOP_1]]#1, [[LOOP_1]]#2) : (!krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index) +// CHECK: [[LOAD_PARAM_0_MEM_1_:%.+]] = affine.apply [[MAP3]]([[IV]]#1){{.}}[[DIM_0_]]{{.}} +// CHECK: [[LOAD_PARAM_0_MEM_2_:%.+]] = krnl.load [[PARAM_0_]]{{.}}[[IV]]#0, [[LOAD_PARAM_0_MEM_1_]], [[IV]]#2{{.}} : memref +// CHECK: krnl.store [[LOAD_PARAM_0_MEM_2_]], [[RES_1_]]{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref // CHECK: } // CHECK: return [[RES_]], [[RES_1_]] : memref, memref // CHECK: } @@ -542,6 +558,8 @@ func @test_splitv11_unknown_dimension(%arg0 : tensor) -> (tensor<*xf %0, %1 = "onnx.SplitV11"(%arg0) { axis = 1 : si64, split = [2, 30]} : (tensor) -> (tensor<*xf32>, tensor<*xf32>) "std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> () +// CHECK: [[MAP0:#.+]] = affine_map<(d0) -> (d0)> +// CHECK: [[MAP1:#.+]] = affine_map<(d0) -> (d0 + 2)> // CHECK-LABEL: func @test_splitv11_unknown_dimension // CHECK-SAME: ([[PARAM_0_:%.+]]: memref) -> (memref, memref) { // CHECK: [[CST_0_:%.+]] = arith.constant 0 : index @@ -550,16 +568,18 @@ func @test_splitv11_unknown_dimension(%arg0 : tensor) -> (tensor<*xf // CHECK-NOT: separator of consecutive DAGs // CHECK-DAG: [[RES_:%.+]] = memref.alloc([[DIM_0_]]) {{.*}} : memref // CHECK-DAG: [[RES_1_:%.+]] = memref.alloc([[DIM_1_]]) {{.*}} : memref -// CHECK-DAG: [[LOOP_0_:%.+]]:3 = krnl.define_loops 3 -// CHECK: krnl.iterate([[LOOP_0_]]#0, [[LOOP_0_]]#1, [[LOOP_0_]]#2) with ([[LOOP_0_]]#0 -> [[I_0_:%.+]] = 0 to [[DIM_0_]], [[LOOP_0_]]#1 -> [[I_1_:%.+]] = 0 to 2, [[LOOP_0_]]#2 -> [[I_2_:%.+]] = 0 to 64){ -// CHECK: [[LOAD_PARAM_0_MEM_:%.+]] = krnl.load [[PARAM_0_]]{{.}}[[I_0_]], [[I_1_]], [[I_2_]]{{.}} : memref -// CHECK: krnl.store [[LOAD_PARAM_0_MEM_]], [[RES_]]{{.}}[[I_0_]], [[I_1_]], [[I_2_]]{{.}} : memref -// CHECK: } -// CHECK: [[LOOP_1_:%.+]]:3 = krnl.define_loops 3 -// CHECK: krnl.iterate([[LOOP_1_]]#0, [[LOOP_1_]]#1, [[LOOP_1_]]#2) with ([[LOOP_1_]]#0 -> [[I_3_:%.+]] = 0 to [[DIM_1_]], [[LOOP_1_]]#1 -> [[I_4_:%.+]] = 0 to 30, [[LOOP_1_]]#2 -> [[I_5_:%.+]] = 0 to 64){ -// CHECK: [[LOAD_PARAM_0_MEM_1_:%.+]] = affine.apply #map([[I_4_]]) -// CHECK: [[LOAD_PARAM_0_MEM_2_:%.+]] = krnl.load [[PARAM_0_]]{{.}}[[I_3_]], [[LOAD_PARAM_0_MEM_1_]], [[I_5_]]{{.}} : memref -// CHECK: krnl.store [[LOAD_PARAM_0_MEM_2_]], [[RES_1_]]{{.}}[[I_3_]], [[I_4_]], [[I_5_]]{{.}} : memref +// CHECK-DAG: [[LOOP_0:%.+]]:3 = krnl.define_loops 3 +// CHECK: krnl.iterate([[LOOP_0]]#0, [[LOOP_0]]#1, [[LOOP_0]]#2) with ([[LOOP_0]]#0 -> %arg1 = 0 to [[MAP0]]([[DIM_0_]]), [[LOOP_0]]#1 -> %arg2 = 0 to 2, [[LOOP_0]]#2 -> %arg3 = 0 to 64){ +// CHECK: [[IV:%.+]]:3 = krnl.get_induction_var_value([[LOOP_0]]#0, [[LOOP_0]]#1, [[LOOP_0]]#2) : (!krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index) +// CHECK: [[LOAD_PARAM_0_MEM_:%.+]] = krnl.load [[PARAM_0_]]{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref +// CHECK: krnl.store [[LOAD_PARAM_0_MEM_]], [[RES_]]{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref +// CHECK: } +// CHECK: [[LOOP_1:%.+]]:3 = krnl.define_loops 3 +// CHECK: krnl.iterate([[LOOP_1]]#0, [[LOOP_1]]#1, [[LOOP_1]]#2) with ([[LOOP_1]]#0 -> %arg1 = 0 to [[MAP0]]([[DIM_1_]]), [[LOOP_1]]#1 -> %arg2 = 0 to 30, [[LOOP_1]]#2 -> %arg3 = 0 to 64){ +// CHECK: [[IV:%.+]]:3 = krnl.get_induction_var_value([[LOOP_1]]#0, [[LOOP_1]]#1, [[LOOP_1]]#2) : (!krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index) +// CHECK: [[LOAD_PARAM_0_MEM_1_:%.+]] = affine.apply [[MAP1]]([[IV]]#1) +// CHECK: [[LOAD_PARAM_0_MEM_2_:%.+]] = krnl.load [[PARAM_0_]]{{.}}[[IV]]#0, [[LOAD_PARAM_0_MEM_1_]], [[IV]]#2{{.}} : memref +// CHECK: krnl.store [[LOAD_PARAM_0_MEM_2_]], [[RES_1_]]{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref // CHECK: } // CHECK: return [[RES_]], [[RES_1_]] : memref, memref // CHECK: } @@ -572,7 +592,11 @@ func @test_splitv11_unknown_dimension_equal_split(%arg0 : tensor) -> %0, %1 = "onnx.SplitV11"(%arg0) { axis = 1 : si64 } : (tensor) -> (tensor<*xf32>, tensor<*xf32>) "std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> () -// CHECK-LABEL: func @test_splitv11_unknown_dimension_equal_split +// CHECK: [[MAP0:#.+]] = affine_map<()[s0] -> (s0 ceildiv 2)> +// CHECK: [[MAP1:#.+]] = affine_map<(d0) -> (d0)> +// CHECK: [[MAP2:#.+]] = affine_map<(d0, d1) -> (d1)> +// CHECK: [[MAP3:#.+]] = affine_map<(d0)[s0] -> (d0 + s0 ceildiv 2)> +// CHECK-LABEL: func @test_splitv11_unknown_dimension_equal_split // CHECK-SAME: ([[PARAM_0_:%.+]]: memref) -> (memref, memref) { // CHECK-DAG: [[CST_1_:%.+]] = arith.constant 1 : index // CHECK-DAG: [[CST_0_:%.+]] = arith.constant 0 : index @@ -585,16 +609,18 @@ func @test_splitv11_unknown_dimension_equal_split(%arg0 : tensor) -> // CHECK-NOT: separator of consecutive DAGs // CHECK-DAG: [[RES_:%.+]] = memref.alloc([[DIM_1_]], [[VAR_3_]]) {{.*}} : memref // CHECK-DAG: [[RES_1_:%.+]] = memref.alloc([[DIM_2_]], [[VAR_5_]]) {{.*}} : memref -// CHECK-DAG: [[LOOP_0_:%.+]]:3 = krnl.define_loops 3 -// CHECK: krnl.iterate([[LOOP_0_]]#0, [[LOOP_0_]]#1, [[LOOP_0_]]#2) with ([[LOOP_0_]]#0 -> [[I_0_:%.+]] = 0 to [[DIM_1_]], [[LOOP_0_]]#1 -> [[I_1_:%.+]] = 0 to [[VAR_3_]], [[LOOP_0_]]#2 -> [[I_2_:%.+]] = 0 to 64){ -// CHECK: [[LOAD_PARAM_0_MEM_:%.+]] = krnl.load [[PARAM_0_]]{{.}}[[I_0_]], [[I_1_]], [[I_2_]]{{.}} : memref -// CHECK: krnl.store [[LOAD_PARAM_0_MEM_]], [[RES_]]{{.}}[[I_0_]], [[I_1_]], [[I_2_]]{{.}} : memref -// CHECK: } -// CHECK: [[LOOP_1_:%.+]]:3 = krnl.define_loops 3 -// CHECK: krnl.iterate([[LOOP_1_]]#0, [[LOOP_1_]]#1, [[LOOP_1_]]#2) with ([[LOOP_1_]]#0 -> [[I_3_:%.+]] = 0 to [[DIM_2_]], [[LOOP_1_]]#1 -> [[I_4_:%.+]] = 0 to [[VAR_5_]], [[LOOP_1_]]#2 -> [[I_5_:%.+]] = 0 to 64){ -// CHECK: [[LOAD_PARAM_0_MEM_1_:%.+]] = affine.apply #map1([[I_4_]]){{.}}[[DIM_0_]]{{.}} -// CHECK: [[LOAD_PARAM_0_MEM_2_:%.+]] = krnl.load [[PARAM_0_]]{{.}}[[I_3_]], [[LOAD_PARAM_0_MEM_1_]], [[I_5_]]{{.}} : memref -// CHECK: krnl.store [[LOAD_PARAM_0_MEM_2_]], [[RES_1_]]{{.}}[[I_3_]], [[I_4_]], [[I_5_]]{{.}} : memref +// CHECK-DAG: [[LOOP_0:%.+]]:3 = krnl.define_loops 3 +// CHECK: krnl.iterate([[LOOP_0]]#0, [[LOOP_0]]#1, [[LOOP_0]]#2) with ([[LOOP_0]]#0 -> %arg1 = 0 to [[MAP1]]([[DIM_1_]]), [[LOOP_0]]#1 -> %arg2 = 0 to [[MAP2]]([[DIM_1_]], [[VAR_3_]]), [[LOOP_0]]#2 -> %arg3 = 0 to 64){ +// CHECK: [[IV:%.+]]:3 = krnl.get_induction_var_value([[LOOP_0]]#0, [[LOOP_0]]#1, [[LOOP_0]]#2) : (!krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index) +// CHECK: [[LOAD_PARAM_0_MEM_:%.+]] = krnl.load [[PARAM_0_]]{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref +// CHECK: krnl.store [[LOAD_PARAM_0_MEM_]], [[RES_]]{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref +// CHECK: } +// CHECK: [[LOOP_1:%.+]]:3 = krnl.define_loops 3 +// CHECK: krnl.iterate([[LOOP_1]]#0, [[LOOP_1]]#1, [[LOOP_1]]#2) with ([[LOOP_1]]#0 -> %arg1 = 0 to [[MAP1]]([[DIM_2_]]), [[LOOP_1]]#1 -> %arg2 = 0 to [[MAP2]]([[DIM_2_]], [[VAR_5_]]), [[LOOP_1]]#2 -> %arg3 = 0 to 64){ +// CHECK: [[IV:%.+]]:3 = krnl.get_induction_var_value([[LOOP_1]]#0, [[LOOP_1]]#1, [[LOOP_1]]#2) : (!krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index) +// CHECK: [[LOAD_PARAM_0_MEM_1_:%.+]] = affine.apply [[MAP3]]([[IV]]#1){{.}}[[DIM_0_]]{{.}} +// CHECK: [[LOAD_PARAM_0_MEM_2_:%.+]] = krnl.load [[PARAM_0_]]{{.}}[[IV]]#0, [[LOAD_PARAM_0_MEM_1_]], [[IV]]#2{{.}} : memref +// CHECK: krnl.store [[LOAD_PARAM_0_MEM_2_]], [[RES_1_]]{{.}}[[IV]]#0, [[IV]]#1, [[IV]]#2{{.}} : memref // CHECK: } // CHECK: return [[RES_]], [[RES_1_]] : memref, memref // CHECK: }