Skip to content

Commit

Permalink
speed up profiling by removing initialization
Browse files Browse the repository at this point in the history
  • Loading branch information
masahi committed Dec 12, 2021
1 parent 93cd039 commit a447b57
Showing 1 changed file with 7 additions and 35 deletions.
42 changes: 7 additions & 35 deletions python/tvm/contrib/cutlass/conv2d_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,7 @@ def __init__(self):
auto dilated_w = (filter_size.w() - 1) * dilation.column() + 1;
auto h = (input_size.h() + padding.n() + padding.h() - dilated_h) / conv_stride.row() + 1;
auto w = (input_size.w() + padding.w() + padding.c() - dilated_w) / conv_stride.column() + 1;
return cutlass::Tensor4DCoord(
input_size.n(),
h, w,
filter_size.n());
return cutlass::Tensor4DCoord(input_size.n(), h, w, filter_size.n());
}
};
Expand All @@ -98,31 +95,6 @@ def __init__(self):
cutlass::HostTensor<ElementOutput, typename ImplicitGemm::LayoutC> tensor_c(oshape);
cutlass::HostTensor<ElementOutput, typename ImplicitGemm::LayoutC> tensor_ref_c(oshape);
cutlass::reference::host::TensorFillRandomUniform(
tensor_a.host_view(),
1,
ElementInputA(7),
ElementInputA(-8),
0);
cutlass::reference::host::TensorFillRandomUniform(
tensor_b.host_view(),
1,
ElementInputB(7),
ElementInputB(-8),
0);
cutlass::reference::host::TensorFill(
tensor_c.host_view());
cutlass::reference::host::TensorFill(
tensor_ref_c.host_view());
tensor_a.sync_device();
tensor_b.sync_device();
tensor_c.sync_device();
tensor_ref_c.sync_device();
cutlass::conv::Conv2dProblemSize problem_size(
options.input_size,
options.filter_size,
Expand All @@ -137,12 +109,12 @@ def __init__(self):
using ElementComputeEpilogue = typename ImplicitGemm::ElementCompute;
typename ImplicitGemm::Arguments arguments{
problem_size,
tensor_a.device_ref(),
tensor_b.device_ref(),
tensor_c.device_ref(),
tensor_c.device_ref(),
{ElementComputeEpilogue(1), ElementComputeEpilogue(0)},
};
tensor_a.device_ref(),
tensor_b.device_ref(),
tensor_c.device_ref(),
tensor_c.device_ref(),
{ElementComputeEpilogue(1), ElementComputeEpilogue(0)},
};
ImplicitGemm implicit_gemm_op;
size_t workspace_size = implicit_gemm_op.get_workspace_size(arguments);
Expand Down

0 comments on commit a447b57

Please sign in to comment.