[MLIR][TORCH] Add E2E support for max_pool2d_with_indices op

vivekkhandelwal1 · vivekkhandelwal1 · commit 51d61402dd51 · 2022-04-01T13:53:29.000+05:30
This commit adds lowering of `max_pool2d_with_indices` op.

Signed-Off By: Vivek Khandelwal &lt;vivek@nod-labs.com&gt;
diff --git a/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td b/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td
@@ -2876,6 +2876,35 @@ def Torch_AtenMaxPool2dOp : Torch_Op<"aten.max_pool2d", [
   }];
 }
 
+def Torch_AtenMaxPool2dWithIndicesOp : Torch_Op<"aten.max_pool2d_with_indices", [
+    AllowsTypeRefinement,
+    HasValueSemantics,
+    ReadOnly
+  ]> {
+  let summary = "Generated op for `aten::max_pool2d_with_indices : (Tensor, int[], int[], int[], int[], bool) -> (Tensor, Tensor)`";
+  let arguments = (ins
+    AnyTorchTensorType:$self,
+    TorchIntListType:$kernel_size,
+    TorchIntListType:$stride,
+    TorchIntListType:$padding,
+    TorchIntListType:$dilation,
+    Torch_BoolType:$ceil_mode
+  );
+  let results = (outs
+    AnyTorchTensorType:$result0,
+    AnyTorchTensorType:$result1
+  );
+  let hasCustomAssemblyFormat = 1;
+  let extraClassDefinition = [{
+    ParseResult AtenMaxPool2dWithIndicesOp::parse(OpAsmParser &parser, OperationState &result) {
+      return parseDefaultTorchOp(parser, result, 6, 2);
+    }
+    void AtenMaxPool2dWithIndicesOp::print(OpAsmPrinter &printer) {
+      printDefaultTorchOp(printer, *this, 6, 2);
+    }
+  }];
+}
+
 def Torch_AtenSoftmaxIntOp : Torch_Op<"aten.softmax.int", [
     AllowsTypeRefinement,
     HasValueSemantics,
diff --git a/lib/Conversion/TorchToLinalg/Pooling.cpp b/lib/Conversion/TorchToLinalg/Pooling.cpp
@@ -27,6 +27,91 @@ using namespace mlir;
 using namespace mlir::torch;
 using namespace mlir::torch::Torch;
 
+// Computes maxpool2d for AtenMaxPool2dOp and AtenMaxPool2dWithIndicesOp.
+template <typename OpTy>
+static LogicalResult computeMaxPool2d(OpTy op,
+                                      ConversionPatternRewriter &rewriter,
+                                      Value self, Value &result) {
+  Location loc = op.getLoc();
+  Value ceilMode = op.ceil_mode();
+  Type elementType = self.getType().cast<RankedTensorType>().getElementType();
+  if (!elementType.isa<mlir::FloatType>())
+    return op.emitError("unimplemented: non-floating point type");
+
+  // Pattern match against the op's original operands, because otherwise we
+  // will get the lowered version of the operands which is harder to pattern
+  // match.
+  SmallVector<int64_t, 2> strideInts;
+  if (!matchPattern(op.stride(), m_TorchConstantIntList(strideInts)))
+    return rewriter.notifyMatchFailure(op, "only support constant int strides");
+  SmallVector<int64_t, 2> dilationInts;
+  if (!matchPattern(op.dilation(), m_TorchConstantIntList(dilationInts)))
+    return rewriter.notifyMatchFailure(op,
+                                       "only support constant int dilations");
+  SmallVector<int64_t, 2> paddingInts;
+  if (!matchPattern(op.padding(), m_TorchConstantIntList(paddingInts)))
+    return rewriter.notifyMatchFailure(op,
+                                       "only support constant int paddings");
+  SmallVector<int64_t, 2> kernelSizeInts;
+  if (!matchPattern(op.kernel_size(), m_TorchConstantIntList(kernelSizeInts)))
+    return rewriter.notifyMatchFailure(op, "only support kernel size ints");
+  bool ceilModeFalse = false;
+  if (!matchPattern(op.ceil_mode(), m_TorchConstantBool(&ceilModeFalse)))
+    return rewriter.notifyMatchFailure(op, "only ceil_mode false is supported");
+
+  SmallVector<int64_t, 4> paddingIncludingNC = {0, 0};
+  paddingIncludingNC.insert(paddingIncludingNC.end(), paddingInts.begin(),
+                            paddingInts.end());
+  Value paddedInput =
+      torch_to_linalg::getPaddedTensor(op, rewriter, self, paddingIncludingNC);
+
+  Value N = getDimOp(rewriter, loc, self, 0);
+  Value C = getDimOp(rewriter, loc, self, 1);
+  Value H = getDimOp(rewriter, loc, self, 2);
+  Value W = getDimOp(rewriter, loc, self, 3);
+
+  SmallVector<Value> paddingIntValues =
+      getAsConstantIntValues(rewriter, loc, paddingInts);
+  SmallVector<Value> dilationIntValues =
+      getAsConstantIntValues(rewriter, loc, dilationInts);
+  SmallVector<Value> kernelSizeIntValues =
+      getAsConstantIntValues(rewriter, loc, kernelSizeInts);
+  SmallVector<Value> strideIntValues =
+      getAsConstantIntValues(rewriter, loc, strideInts);
+
+  Value Hout = torch_to_linalg::getOutputDimForConvOps(
+      rewriter, loc, H, paddingIntValues[0], dilationIntValues[0],
+      kernelSizeIntValues[0], strideIntValues[0]);
+  Value Wout = torch_to_linalg::getOutputDimForConvOps(
+      rewriter, loc, W, paddingIntValues[1], dilationIntValues[1],
+      kernelSizeIntValues[1], strideIntValues[1]);
+
+  // Initialize output tensor with smallest floating point value
+  Value outTensor = rewriter.create<linalg::InitTensorOp>(
+      loc, ValueRange{N, C, Hout, Wout}, elementType);
+  auto initialAttr = rewriter.getFloatAttr(
+      elementType, APFloat::getSmallest(
+                       elementType.cast<mlir::FloatType>().getFloatSemantics(),
+                       /*Negative*/ true));
+  Value initValue = rewriter.create<arith::ConstantOp>(loc, initialAttr);
+  Value outTensorInitialized =
+      rewriter.create<linalg::FillOp>(loc, initValue, outTensor).getResult(0);
+
+  auto stridesAttr = rewriter.getI64VectorAttr(strideInts);
+  auto dilationAttr = rewriter.getI64VectorAttr(dilationInts);
+  Value windowTensor = rewriter.create<linalg::InitTensorOp>(
+      loc, getAsConstantIndexValues(rewriter, loc, kernelSizeInts),
+      elementType);
+
+  result = rewriter
+               .create<linalg::PoolingNchwMaxOp>(
+                   loc, outTensorInitialized.getType(),
+                   ValueRange{paddedInput, windowTensor}, outTensorInitialized,
+                   stridesAttr, dilationAttr)
+               .getResult(0);
+  return success();
+}
+
 namespace {
 class ConvertAtenMaxPool2dOp : public OpConversionPattern<AtenMaxPool2dOp> {
 public:
@@ -36,94 +121,117 @@ class ConvertAtenMaxPool2dOp : public OpConversionPattern<AtenMaxPool2dOp> {
                   ConversionPatternRewriter &rewriter) const override {
     if (failed(verifyLinalgCompatibleTypes(op, rewriter)))
       return failure();
+
+    Value self = adaptor.self();
+    Value maxPool2d;
+    if (failed(
+            computeMaxPool2d<AtenMaxPool2dOp>(op, rewriter, self, maxPool2d)))
+      return rewriter.notifyMatchFailure(op, "unable to compute maxpool2d");
+    Type newResultType = getTypeConverter()->convertType(op.getType());
+    rewriter.replaceOpWithNewOp<tensor::CastOp>(op, newResultType, maxPool2d);
+    return success();
+  }
+};
+} // namespace
+
+namespace {
+// Returns the result of maxpool2d over the input tensor. And the corresponding
+// indices of the input tensor for the values of the result tensor.
+//
+// The result of the maxpool2d operation is calculated using the helper function
+// written above. For finding the indices, we follow the below method:
+//
+// Let's say the input tensor is a 4-d tensor. The maxpool2d and indices will
+// also be a 4-d tensor. Then:
+//  for i in input.size[0]:
+//    for j in input.size[1]:
+//      for k in input.size[2]:
+//        for l in input.size[3]:
+//          for p in maxpool2d.size[1]:
+//            for q in maxpool2d.size[2]:
+//              for r in maxpool2d.size[3]:
+//                if input[i, j, k, l] == maxpool2d[i, p, q, r]:
+//                  indices[i, p, q, r] = (k * input.size[3] + l)
+//
+class ConvertAtenMaxPool2dWithIndicesOp
+    : public OpConversionPattern<AtenMaxPool2dWithIndicesOp> {
+public:
+  using OpConversionPattern::OpConversionPattern;
+  LogicalResult
+  matchAndRewrite(AtenMaxPool2dWithIndicesOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    if (failed(verifyLinalgCompatibleTypes(op, rewriter)))
+      return failure();
     Location loc = op->getLoc();
     Value self = adaptor.self();
-    Value ceilMode = adaptor.ceil_mode();
 
-    Type elementType = self.getType().cast<RankedTensorType>().getElementType();
-    if (!elementType.isa<mlir::FloatType>())
-      return op.emitError("unimplemented: non-floating point type");
+    // Contains the result of maxpool2d operation over the input.
+    Value maxPool2d;
+    if (failed(computeMaxPool2d<AtenMaxPool2dWithIndicesOp>(op, rewriter, self,
+                                                            maxPool2d)))
+      return rewriter.notifyMatchFailure(op, "unable to compute maxpool2d");
 
-    // Pattern match against the op's original operands, because otherwise we
-    // will get the lowered version of the operands which is harder to pattern
-    // match.
-    SmallVector<int64_t, 2> strideInts;
-    if (!matchPattern(op.stride(), m_TorchConstantIntList(strideInts)))
-      return rewriter.notifyMatchFailure(op,
-                                         "only support constant int strides");
-    SmallVector<int64_t, 2> dilationInts;
-    if (!matchPattern(op.dilation(), m_TorchConstantIntList(dilationInts)))
-      return rewriter.notifyMatchFailure(op,
-                                         "only support constant int dilations");
-    SmallVector<int64_t, 2> paddingInts;
-    if (!matchPattern(op.padding(), m_TorchConstantIntList(paddingInts)))
-      return rewriter.notifyMatchFailure(op,
-                                         "only support constant int paddings");
-    SmallVector<int64_t, 2> kernelSizeInts;
-    if (!matchPattern(op.kernel_size(), m_TorchConstantIntList(kernelSizeInts)))
-      return rewriter.notifyMatchFailure(op, "only support kernel size ints");
-
-    Value falseValue = rewriter.create<arith::ConstantOp>(
-        loc, IntegerAttr::get(rewriter.getIntegerType(1), 0));
-    Value ceilModeFalse = rewriter.create<arith::CmpIOp>(
-        loc, arith::CmpIPredicate::eq, ceilMode, falseValue);
-    rewriter.create<cf::AssertOp>(
-        loc, ceilModeFalse,
-        rewriter.getStringAttr("only ceil_mode false is supported"));
-
-    SmallVector<int64_t, 4> paddingIncludingNC = {0, 0};
-    paddingIncludingNC.insert(paddingIncludingNC.end(), paddingInts.begin(),
-                              paddingInts.end());
-    Value paddedInput = torch_to_linalg::getPaddedTensor(op, rewriter, self,
-                                                         paddingIncludingNC);
-
-    Value N = getDimOp(rewriter, loc, self, 0);
-    Value C = getDimOp(rewriter, loc, self, 1);
-    Value H = getDimOp(rewriter, loc, self, 2);
-    Value W = getDimOp(rewriter, loc, self, 3);
-
-    SmallVector<Value> paddingIntValues =
-        getAsConstantIntValues(rewriter, loc, paddingInts);
-    SmallVector<Value> dilationIntValues =
-        getAsConstantIntValues(rewriter, loc, dilationInts);
-    SmallVector<Value> kernelSizeIntValues =
-        getAsConstantIntValues(rewriter, loc, kernelSizeInts);
-    SmallVector<Value> strideIntValues =
-        getAsConstantIntValues(rewriter, loc, strideInts);
-
-    Value Hout = torch_to_linalg::getOutputDimForConvOps(
-        rewriter, loc, H, paddingIntValues[0], dilationIntValues[0],
-        kernelSizeIntValues[0], strideIntValues[0]);
-    Value Wout = torch_to_linalg::getOutputDimForConvOps(
-        rewriter, loc, W, paddingIntValues[1], dilationIntValues[1],
-        kernelSizeIntValues[1], strideIntValues[1]);
-
-    // Initialize output tensor with smallest floating point value
-    Value outTensor = rewriter.create<linalg::InitTensorOp>(
-        loc, ValueRange{N, C, Hout, Wout}, elementType);
-    auto initialAttr = rewriter.getFloatAttr(
-        elementType,
-        APFloat::getSmallest(
-            elementType.cast<mlir::FloatType>().getFloatSemantics(),
-            /*Negative*/ true));
-    Value initValue = rewriter.create<arith::ConstantOp>(loc, initialAttr);
-    Value outTensorInitialized =
-        rewriter.create<linalg::FillOp>(loc, initValue, outTensor).getResult(0);
-
-    auto stridesAttr = rewriter.getI64VectorAttr(strideInts);
-    auto dilationAttr = rewriter.getI64VectorAttr(dilationInts);
-    Value windowTensor = rewriter.create<linalg::InitTensorOp>(
-        loc, getAsConstantIndexValues(rewriter, loc, kernelSizeInts),
-        elementType);
-
-    Value maxPool2d = rewriter
-                          .create<linalg::PoolingNchwMaxOp>(
-                              loc, outTensorInitialized.getType(),
-                              ValueRange{paddedInput, windowTensor},
-                              outTensorInitialized, stridesAttr, dilationAttr)
-                          .getResult(0);
-    Type newResultType = getTypeConverter()->convertType(op.getType());
-    rewriter.replaceOpWithNewOp<tensor::CastOp>(op, newResultType, maxPool2d);
+    RankedTensorType resultType = getTypeConverter()
+                                      ->convertType(op->getResult(0).getType())
+                                      .cast<RankedTensorType>();
+    RankedTensorType indicesType = getTypeConverter()
+                                       ->convertType(op->getResult(1).getType())
+                                       .cast<RankedTensorType>();
+    unsigned resultRank = resultType.getRank();
+    SmallVector<Value> inputShape(getTensorSizes(rewriter, loc, self));
+    SmallVector<Value> resultShape(getTensorSizes(rewriter, loc, maxPool2d));
+
+    Value indicesTensor = createZeroInitTensor(rewriter, loc, resultShape,
+                                               indicesType.getElementType());
+
+    SmallVector<AffineExpr> inputExprs, maxPoolExprs, indicesExprs;
+    SmallVector<StringRef> iteratorTypes(2 * resultRank - 1,
+                                         getParallelIteratorTypeName());
+    AffineExpr zeroDimExpr = rewriter.getAffineDimExpr(0);
+    inputExprs.push_back(zeroDimExpr);
+    maxPoolExprs.push_back(zeroDimExpr);
+    indicesExprs.push_back(zeroDimExpr);
+
+    for (unsigned i = 1; i < resultRank; i++) {
+      inputExprs.push_back(rewriter.getAffineDimExpr(i));
+      maxPoolExprs.push_back(rewriter.getAffineDimExpr(i + resultRank - 1));
+      indicesExprs.push_back(rewriter.getAffineDimExpr(i + resultRank - 1));
+    }
+
+    auto indexingMaps =
+        AffineMap::inferFromExprList({inputExprs, maxPoolExprs, indicesExprs});
+
+    auto indicesResult =
+        rewriter
+            .create<linalg::GenericOp>(
+                loc, /*resultTensorTypes=*/indicesTensor.getType(),
+                /*inputs=*/ValueRange({self, maxPool2d}),
+                /*outputs=*/indicesTensor,
+                /*indexingMaps=*/indexingMaps,
+                /*iteratorTypes=*/iteratorTypes,
+                [&](OpBuilder &b, Location loc, ValueRange args) {
+                  Value out = args[2];
+                  Value index = b.create<linalg::IndexOp>(loc, resultRank - 2);
+                  index = b.create<arith::MulIOp>(
+                      loc, index, inputShape[resultRank - 2 + 1]);
+                  index = b.create<arith::AddIOp>(
+                      loc, index,
+                      b.create<linalg::IndexOp>(loc, resultRank - 1));
+                  index = castIndexToInt(b, loc, index);
+                  Value predicate;
+                  if (resultType.getElementType().isa<mlir::FloatType>())
+                    predicate = b.create<arith::CmpFOp>(
+                        loc, arith::CmpFPredicate::OEQ, args[0], args[1]);
+                  else
+                    predicate = b.create<arith::CmpIOp>(
+                        loc, arith::CmpIPredicate::eq, args[0], args[1]);
+
+                  Value result =
+                      b.create<arith::SelectOp>(loc, predicate, index, out);
+                  b.create<linalg::YieldOp>(loc, result);
+                })
+            .getResult(0);
+    rewriter.replaceOp(op, {maxPool2d, indicesResult});
     return success();
   }
 };
@@ -234,6 +342,8 @@ void mlir::torch::torch_to_linalg::populatePoolingPatternsAndLegality(
   MLIRContext *context = patterns.getContext();
   target.addIllegalOp<AtenMaxPool2dOp>();
   patterns.add<ConvertAtenMaxPool2dOp>(typeConverter, context);
+  target.addIllegalOp<AtenMaxPool2dWithIndicesOp>();
+  patterns.add<ConvertAtenMaxPool2dWithIndicesOp>(typeConverter, context);
   target.addIllegalOp<AtenAdaptiveAvgPool2dOp>();
   patterns.add<ConvertAtenAdaptiveAvgPool2dOp>(typeConverter, context);
 }
diff --git a/lib/Dialect/Torch/Transforms/RefineTypes.cpp b/lib/Dialect/Torch/Transforms/RefineTypes.cpp
@@ -668,6 +668,21 @@ ChangeResult TypeAnalyzer::visitOperation(
     return changed;
   }
 
+  if (isa<AtenMaxPool2dWithIndicesOp>(op)) {
+    auto self = operands[0]->getValue();
+    auto result0Knowledge =
+        ValueKnowledge::getNotNonePessimisticValueState(op->getContext());
+    result0Knowledge.dtype = self.dtype;
+    auto result1Knowledge =
+        ValueKnowledge::getNotNonePessimisticValueState(op->getContext());
+    result1Knowledge.dtype =
+        IntegerType::get(op->getContext(), 64, IntegerType::Signed);
+    ;
+    auto changed = incorporateKnowledge(op->getResult(0), result0Knowledge);
+    changed |= incorporateKnowledge(op->getResult(1), result1Knowledge);
+    return changed;
+  }
+
   if (auto arange = dyn_cast<AtenArangeOp>(op)) {
     return visitAtenArangeOp(arange);
   }
diff --git a/lib/Dialect/Torch/Transforms/ShapeLibrary.cpp b/lib/Dialect/Torch/Transforms/ShapeLibrary.cpp
@@ -1572,6 +1572,11 @@ module {
     }
     return %none : !torch.none
   }
+  func @"__torch_mlir_shape_fn.aten.max_pool2d_with_indices"(%arg0: !torch.list<int>, %arg1: !torch.list<int>, %arg2: !torch.list<int>, %arg3: !torch.list<int>, %arg4: !torch.list<int>, %arg5: !torch.bool) -> !torch.tuple<list<int>, list<int>> {
+    %0 = call @__torch__.torch_mlir.dialects.torch.importer.jit_ir.build_tools.upstream_shape_helpers.max_pool2d(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5) : (!torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool) -> !torch.list<int>
+    %1 = torch.prim.TupleConstruct %0, %0 : !torch.list<int>, !torch.list<int> -> !torch.tuple<list<int>, list<int>>
+    return %1 : !torch.tuple<list<int>, list<int>>
+  }
   func @"__torch_mlir_shape_fn.aten.adaptive_avg_pool2d"(%arg0: !torch.list<int>, %arg1: !torch.list<int>) -> !torch.list<int> {
     %0 = call @__torch__.torch_mlir.dialects.torch.importer.jit_ir.build_tools.upstream_shape_helpers.adaptive_avg_pool2d(%arg0, %arg1) : (!torch.list<int>, !torch.list<int>) -> !torch.list<int>
     return %0 : !torch.list<int>
diff --git a/python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/shape_lib_gen.py b/python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/shape_lib_gen.py
@@ -563,6 +563,10 @@ def aten〇resize_(self: List[int], size: List[int], memory_format: Optional[int
 def aten〇max_pool2d(self: List[int], kernel_size: List[int], stride: List[int] = (), padding: List[int] = (0, 0), dilation: List[int] = (1, 1), ceil_mode: bool = False) -> List[int]:
     return upstream_shape_helpers.max_pool2d(self, kernel_size, stride, padding, dilation, ceil_mode)
 
+def aten〇max_pool2d_with_indices(self: List[int], kernel_size: List[int], stride: List[int] = (), padding: List[int] = (0, 0), dilation: List[int] = (1, 1), ceil_mode: bool = False) -> Tuple[List[int], List[int]]:
+    maxpool2d = indices = upstream_shape_helpers.max_pool2d(self, kernel_size, stride, padding, dilation, ceil_mode)
+    return maxpool2d, indices
+
 def aten〇adaptive_avg_pool2d(self: List[int], output_size: List[int]) -> List[int]:
     return upstream_shape_helpers.adaptive_avg_pool2d(self, output_size)
 
diff --git a/python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/torch_ods_gen.py b/python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/torch_ods_gen.py
@@ -322,6 +322,9 @@ def emit_with_mutating_variants(key, **kwargs):
     emit(
         "aten::max_pool2d : (Tensor, int[], int[], int[], int[], bool) -> (Tensor)"
     )
+    emit(
+        "aten::max_pool2d_with_indices : (Tensor, int[], int[], int[], int[], bool) -> (Tensor, Tensor)"
+    )
     emit(
         "aten::softmax.int : (Tensor, int, int?) -> (Tensor)"
     )
diff --git a/python/torch_mlir_e2e_test/test_suite/basic.py b/python/torch_mlir_e2e_test/test_suite/basic.py

Original file line number	Diff line number	Diff line change
`@@ -1572,6 +1572,11 @@ module {`
`1572`	`1572`	`}`
`1573`	`1573`	`return %none : !torch.none`
`1574`	`1574`	`}`
	`1575`	`+ func @"__torch_mlir_shape_fn.aten.max_pool2d_with_indices"(%arg0: !torch.list<int>, %arg1: !torch.list<int>, %arg2: !torch.list<int>, %arg3: !torch.list<int>, %arg4: !torch.list<int>, %arg5: !torch.bool) -> !torch.tuple<list<int>, list<int>> {`
	`1576`	`+ %0 = call @__torch__.torch_mlir.dialects.torch.importer.jit_ir.build_tools.upstream_shape_helpers.max_pool2d(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5) : (!torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool) -> !torch.list<int>`
	`1577`	`+ %1 = torch.prim.TupleConstruct %0, %0 : !torch.list<int>, !torch.list<int> -> !torch.tuple<list<int>, list<int>>`
	`1578`	`+ return %1 : !torch.tuple<list<int>, list<int>>`
	`1579`	`+ }`
`1575`	`1580`	`func @"__torch_mlir_shape_fn.aten.adaptive_avg_pool2d"(%arg0: !torch.list<int>, %arg1: !torch.list<int>) -> !torch.list<int> {`
`1576`	`1581`	`%0 = call @__torch__.torch_mlir.dialects.torch.importer.jit_ir.build_tools.upstream_shape_helpers.adaptive_avg_pool2d(%arg0, %arg1) : (!torch.list<int>, !torch.list<int>) -> !torch.list<int>`
`1577`	`1582`	`return %0 : !torch.list<int>`
Original file line number	Diff line number	Diff line change
`@@ -322,6 +322,9 @@ def emit_with_mutating_variants(key, **kwargs):`
`322`	`322`	`emit(`
`323`	`323`	`"aten::max_pool2d : (Tensor, int[], int[], int[], int[], bool) -> (Tensor)"`
`324`	`324`	`)`
	`325`	`+ emit(`
	`326`	`+ "aten::max_pool2d_with_indices : (Tensor, int[], int[], int[], int[], bool) -> (Tensor, Tensor)"`
	`327`	`+ )`
`325`	`328`	`emit(`
`326`	`329`	`"aten::softmax.int : (Tensor, int, int?) -> (Tensor)"`
`327`	`330`	`)`