Description
backward_data_fp16.cu(193): error: more than one instance of constructor "cutlass::Tensor4DCoord::Tensor4DCoord" matches the argument list:
function "cutlass::Tensor4DCoord::Tensor4DCoord(cutlass::Tensor4DCoord::Index, cutlass::Tensor4DCoord::Index, cutlass::Tensor4DCoord::Index, cutlass::Tensor4DCoord::Index)"
function "cutlass::Tensor4DCoord::Tensor4DCoord(cutlass::Tensor4DCoord::LongIndex, cutlass::Tensor4DCoord::LongIndex, cutlass::Tensor4DCoord::LongIndex, cutlass::Tensor4DCoord::LongIndex)"
argument types are: (int64_t, int64_t, int64_t, int)
backward_data_fp16.cu(215): error: no instance of constructor "cutlass::conv::kernel::ImplicitBatchedGemmTnDepthwiseConvolution<Mma_, Epilogue_, ThreadblockSwizzle_, ConvOperator, ConvProblemSize_>::Arguments::Arguments [with Mma_=cutlass::conv::threadblock::MmaTnPrecompPipelined<ThreadblockShape, cutlass::conv::threadblock::Dwconv2dTileIterator<cutlass::MatrixShape<64, 32>, ElementSrc, cutlass::layout::TensorNCHW, cutlass::transform::PitchLinearWarpRakedThreadMap<cutlass::layout::PitchLinearShape<32, 64>, 256, cutlass::layout::PitchLinearShape<4, 8>, 8>, 1, 0>, cutlass::transform::threadblock::RegularTileIterator<cutlass::MatrixShape<64, 32>, ElementSrc, cutlass::layout::RowMajorVoltaTensorOpMultiplicandCrosswise<16, 32>, 0, cutlass::transform::PitchLinearWarpRakedThreadMap<cutlass::layout::PitchLinearShape<32, 64>, 256, cutlass::layout::PitchLinearShape<4, 8>, 8>, 16>, cutlass::conv::threadblock::Dwconv2dTileFilterIteratorDgradPrecomp<cutlass::MatrixShape<32, 128>, ElementFilter, cutlass::layout::TensorNCHW, cutlass::transform::PitchLinearWarpRakedThreadMap<cutlass::layout::PitchLinearShape<128, 32>, 256, cutlass::layout::PitchLinearShape<8, 4>, 8>, 1>, cutlass::transform::threadblock::RegularTileIterator<cutlass::MatrixShape<32, 128>, ElementFilter, cutlass::layout::RowMajorVoltaTensorOpMultiplicandBCongruous<16>, 0, cutlass::transform::PitchLinearWarpRakedThreadMap<cutlass::layout::PitchLinearShape<128, 32>, 256, cutlass::layout::PitchLinearShape<8, 4>, 8>, 16>, ElementAccumulator, LayoutDst, cutlass::gemm::threadblock::MmaPolicy<cutlass::gemm::warp::MmaVoltaTensorOp<WarpShape, ElementSrc, cutlass::layout::RowMajorVoltaTensorOpMultiplicandCrosswise<16, 32>, ElementFilter, cutlass::layout::RowMajorVoltaTensorOpMultiplicandBCongruous<16>, ElementAccumulator, cutlass::layout::RowMajor, cutlass::gemm::warp::MmaTensorOpPolicy<cutlass::arch::Mma<cutlass::gemm::GemmShape<16, 16, 4>, 32, ElementSrc, cutlass::layout::RowMajor, ElementFilter, cutlass::layout::RowMajor, ElementAccumulator, cutlass::layout::RowMajor, cutlass::arch::OpMultiplyAdd>, cutlass::MatrixShape<1, 1>>, __nv_bool>, cutlass::MatrixShape<0, 0>, cutlass::MatrixShape<0, 0>, 1>, cutlass::NumericArrayConverter<ElementSrc, ElementSrc, 8, cutlass::FloatRoundStyle::round_to_nearest>, cutlass::NumericArrayConverter<ElementFilter, ElementFilter, 16, cutlass::FloatRoundStyle::round_to_nearest>, _nv_bool>, Epilogue=cutlass::epilogue::threadblock::ConvolutionEpilogue<ThreadblockShape, cutlass::layout::TensorNCHW, 1, cutlass::gemm::warp::MmaVoltaTensorOp<WarpShape, ElementSrc, cutlass::layout::RowMajorVoltaTensorOpMultiplicandCrosswise<16, 32>, ElementFilter, cutlass::layout::RowMajorVoltaTensorOpMultiplicandBCongruous<16>, ElementAccumulator, cutlass::layout::RowMajor, cutlass::gemm::warp::MmaTensorOpPolicy<cutlass::arch::Mma<cutlass::gemm::GemmShape<16, 16, 4>, 32, ElementSrc, cutlass::layout::RowMajor, ElementFilter, cutlass::layout::RowMajor, ElementAccumulator, cutlass::layout::RowMajor, cutlass::arch::OpMultiplyAdd>, cutlass::MatrixShape<1, 1>>, nv_bool>, cutlass::epilogue::threadblock::Dwconv2dPredicatedTileIterator<cutlass::epilogue::threadblock::OutputTileOptimalThreadMap<cutlass::epilogue::threadblock::OutputTileShape<128, 4, 4, 2, 1>, cutlass::epilogue::threadblock::OutputTileShape<1, 2, 1, 1, 2>, 256, 1, 16>, cutlass::layout::TensorNCHW, ElementDst>, cutlass::epilogue::warp::FragmentIteratorVoltaTensorOp<WarpShape, cutlass::gemm::GemmShape<32, 32, 4>, ElementAccumulator, cutlass::layout::RowMajor>, cutlass::epilogue::warp::TileIteratorVoltaTensorOp<WarpShape, cutlass::gemm::GemmShape<32, 32, 4>, ElementAccumulator, cutlass::layout::RowMajor>, cutlass::epilogue::threadblock::SharedLoadIterator<cutlass::epilogue::threadblock::OutputTileOptimalThreadMap<cutlass::epilogue::threadblock::OutputTileShape<128, 4, 4, 2, 1>, cutlass::epilogue::threadblock::OutputTileShape<1, 2, 1, 1, 2>, 256, 1, 16>::CompactedThreadMap, ElementAccumulator, 4>, cutlass::epilogue::threadblock::Dwconv2dBiasTileIterator<cutlass::layout::TensorNCHW, ElementDst, 1>, EpilogueOp, cutlass::MatrixShape<0, 2>, false>, ThreadblockSwizzle=SwizzleThreadBlock, ConvOperator=cutlass::conv::Operator::kDgrad, ConvProblemSize=cutlass::conv::Conv2dProblemSize]" matches the argument list
argument types are: ({...}, cutlass::TensorRef<ElementSrc, LayoutSrc>, cutlass::TensorRef<ElementSrc, LayoutSrc>, long, long, cutlass::TensorRef<ElementSrc, LayoutSrc>, {...})
3 errors detected in the compilation of "backward_data_fp16.cu".
error: command '/usr/local/cuda/bin/nvcc' failed with exit code 1