Skip to content

Commit

Permalink
simplify conv dnnlowp ops by not allowing fp32 in/out (pytorch#15758)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch#15758

DNNLOWP Conv operators became very complex due to many options. This diff simplifies them by not allowing fp32 in/out. This is OK for Conv operators because Conv operators are usually used in deep networks where quantizing and dequantizing using separate operators is not much overhead.

Reviewed By: csummersea

Differential Revision: D13587341

fbshipit-source-id: e88c919dae79d1c5b7d787ea539edf5bcb064afc
  • Loading branch information
jspark1105 authored and facebook-github-bot committed Jan 7, 2019
1 parent 49ba2cb commit bc328d0
Show file tree
Hide file tree
Showing 10 changed files with 338 additions and 729 deletions.
266 changes: 61 additions & 205 deletions caffe2/quantization/server/conv_dnnlowp_acc16_op.cc

Large diffs are not rendered by default.

12 changes: 3 additions & 9 deletions caffe2/quantization/server/conv_dnnlowp_acc16_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ class ConvDNNLowPAcc16Op final : public ConvDNNLowPOp<std::uint8_t, ReluFused> {
using BaseType = ConvDNNLowPOp<std::uint8_t, ReluFused>;
using BaseType::BIAS;
using BaseType::col_buffer_;
using BaseType::dequantize_output_;
using BaseType::FILTER;
using BaseType::in_qparams_;
using BaseType::INPUT;
Expand All @@ -36,15 +35,10 @@ class ConvDNNLowPAcc16Op final : public ConvDNNLowPOp<std::uint8_t, ReluFused> {

bool GetQuantizationParameters_();

template <typename InType>
bool RunOnDeviceWithOrderNCHWAndType_();
template <typename InType>
bool RunOnDeviceWithOrderNHWCAndType_();

template <typename PackAMatrix, fbgemm::QuantizationGranularity Q_GRAN>
template <fbgemm::QuantizationGranularity Q_GRAN>
void DispatchFBGEMM_(
PackAMatrix& packA,
const std::uint8_t* col_buffer_quantized_data,
fbgemm::PackAWithRowOffset<std::uint8_t, std::int16_t>& packA,
const std::uint8_t* col_buffer_data,
vector<std::int32_t>* Y_int32,
uint8_t* Y_uint8_data);

Expand Down
18 changes: 4 additions & 14 deletions caffe2/quantization/server/conv_dnnlowp_acc16_op_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ class DNNLowPOpConvAcc16OpTest(hu.HypothesisTestCase):
output_channels_per_group=st.integers(2, 16),
batch_size=st.integers(1, 3),
order=st.sampled_from(["NCHW", "NHWC"]),
in_quantized=st.booleans(),
out_quantized=st.booleans(),
weight_quantized=st.booleans(),
share_col_buffer=st.booleans(),
preserve_activation_sparsity=st.booleans(),
Expand All @@ -51,8 +49,6 @@ def test_dnnlowp_conv_acc16_int(
output_channels_per_group,
batch_size,
order,
in_quantized,
out_quantized,
weight_quantized,
share_col_buffer,
preserve_activation_sparsity,
Expand Down Expand Up @@ -121,8 +117,8 @@ def test_dnnlowp_conv_acc16_int(
for op_type, engine in op_engine_list:
net = core.Net("test_net")

do_quantize = "DNNLOWP" in engine and in_quantized
do_dequantize = "DNNLOWP" in engine and out_quantized
do_quantize = "DNNLOWP" in engine
do_dequantize = "DNNLOWP" in engine
do_quantize_weight = (
"DNNLOWP" in engine and weight_quantized and len(outputs) > 0
)
Expand Down Expand Up @@ -166,7 +162,6 @@ def test_dnnlowp_conv_acc16_int(
dilation=dilation,
pad=pad,
order=order,
dequantize_output=not do_dequantize,
shared_buffer=(1 if share_col_buffer else 0),
preserve_activation_sparsity=preserve_activation_sparsity,
preserve_weight_sparsity=preserve_weight_sparsity,
Expand Down Expand Up @@ -210,8 +205,6 @@ def test_dnnlowp_conv_acc16_int(
output_channels_per_group=st.integers(2, 16),
batch_size=st.integers(1, 3),
order=st.sampled_from(["NHWC"]),
in_quantized=st.booleans(),
out_quantized=st.booleans(),
weight_quantized=st.booleans(),
prepack_weight=st.booleans(),
nbits_in_non_outlier=st.sampled_from((0, 1, 6, 8)),
Expand All @@ -232,8 +225,6 @@ def test_dnnlowp_conv_acc16_outlier(
output_channels_per_group,
batch_size,
order,
in_quantized,
out_quantized,
weight_quantized,
prepack_weight,
nbits_in_non_outlier,
Expand Down Expand Up @@ -295,8 +286,8 @@ def test_dnnlowp_conv_acc16_outlier(
init_net = core.Net("test_init_net")
net = core.Net("test_net")

do_quantize = "DNNLOWP" in engine and in_quantized
do_dequantize = "DNNLOWP" in engine and out_quantized
do_quantize = "DNNLOWP" in engine
do_dequantize = "DNNLOWP" in engine
do_quantize_weight = "DNNLOWP" in engine and weight_quantized
do_prepack_weight = "DNNLOWP" in engine and prepack_weight

Expand Down Expand Up @@ -357,7 +348,6 @@ def test_dnnlowp_conv_acc16_outlier(
dilation=dilation,
pad=pad,
order=order,
dequantize_output=not do_dequantize,
nbits_in_non_outlier=nbits_in_non_outlier,
shared_buffer=(1 if share_col_buffer else 0),
preserve_activation_sparsity=preserve_activation_sparsity,
Expand Down
Loading

0 comments on commit bc328d0

Please sign in to comment.