Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
[MXNET-290] MKLDNN support for model quantization (#10433)
Browse files Browse the repository at this point in the history
* mkldnn support for quantization

* fix output number in graph

* update licsence

* modify Jenkinsfile

* modify Jenkinsfile

* mkldnn has no int8 fc api, excluded_sym_names includes fc for cpu

* add mkldnn uint8 pass for quantization graph

* update ut

* retrig ic

* remove no mkldnn quantization test temp

* seperate mkldnn quantization ut from gpu quantization ut

* rm dev_id check for cpu

* add mkl tests dictionary

* resolve review comments

* simplify DequantizeStorageType() logic

* simplify quantize/quantized_conv storage type logic

* Add mkldnn_OIhw4i16o4i type case (needed by int8)

* INT8 conv/pooling: share with FP32 convolution/pooling class/function

* minor indent changes

* Remove unnecessary mkldnn_quantized_pooling-inl.h

* Fix minor issue

* Fix lint

* delete duplicated data type

* fix bugs and convert requantize data to NDArray

* fix lint

* fix requantize storgetype

* fix requantize storge type

* Fix coding style comments

* Fix compile issue

* Change to use quantized_dtype option to support uint8/int8 scenarios

* fix gpu test quantization failure

* Fix indent

* fix quantized pooling param parser

* Fix imagenet_gen_qsym.py option style

* retrigger jenkins

* retrigger again

* trigger jenkins

* Resolve further comments

* share test code

* remove unnecessary test code

* add test_quantize_model for cpu

* add comments in quantize_graph_pass.cc

* jenkins

* jenkins

* improve coding style

* improve coding style

* Add naive CPU quantization test back and share quantization code between naive-CPU/MKLDNN/GPU

* rename test_quantization_cpu.py to test_quantization_mkldnn.py

* code style

* trigger

* Adjust variable naming for test quantization

* add qdtype for quantized op test case to test/bypass all cases explicitly

* change expressions to be consistent

* revert unnecessary change
  • Loading branch information
wentingj authored and marcoabreu committed Jun 14, 2018
1 parent eb95d7b commit d79e1ad
Show file tree
Hide file tree
Showing 27 changed files with 1,185 additions and 326 deletions.
3 changes: 1 addition & 2 deletions ci/docker/runtime_functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -466,13 +466,12 @@ unittest_ubuntu_python3_cpu() {

unittest_ubuntu_python3_cpu_mkldnn() {
set -ex
export PYTHONPATH=./python/
export PYTHONPATH=./python/
# MXNET_MKLDNN_DEBUG is buggy and produces false positives
# https://github.com/apache/incubator-mxnet/issues/10026
#export MXNET_MKLDNN_DEBUG=1 # Ignored if not present
export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
nosetests-3.4 --verbose tests/python/unittest
nosetests-3.4 --verbose tests/python/quantization
nosetests-3.4 --verbose tests/python/mkl
}

Expand Down
44 changes: 33 additions & 11 deletions example/quantization/imagenet_gen_qsym.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def save_params(fname, arg_params, aux_params, logger=None):

if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Generate a calibrated quantized model from a FP32 model')
parser.add_argument('--ctx', type=str, default='gpu')
parser.add_argument('--model', type=str, choices=['imagenet1k-resnet-152', 'imagenet1k-inception-bn'],
help='currently only supports imagenet1k-resnet-152 or imagenet1k-inception-bn')
parser.add_argument('--batch-size', type=int, default=32)
Expand Down Expand Up @@ -91,8 +92,18 @@ def save_params(fname, arg_params, aux_params, logger=None):
' thresholds. This mode is expected to produce the best inference accuracy of all three'
' kinds of quantized models if the calibration dataset is representative enough of the'
' inference dataset.')
parser.add_argument('--quantized-dtype', type=str, default='int8',
choices=['int8', 'uint8'],
help='quantization destination data type for input data')
args = parser.parse_args()

if args.ctx == 'gpu':
ctx = mx.gpu(0)
elif args.ctx == 'cpu':
ctx = mx.cpu(0)
else:
raise ValueError('ctx %s is not supported in this script' % args.ctx)

logging.basicConfig()
logger = logging.getLogger('logger')
logger.setLevel(logging.INFO)
Expand Down Expand Up @@ -129,17 +140,26 @@ def save_params(fname, arg_params, aux_params, logger=None):
excluded_sym_names = []
if args.model == 'imagenet1k-resnet-152':
rgb_mean = '0,0,0'
calib_layer = lambda name: name.endswith('_output') and (name.find('conv') != -1
or name.find('sc') != -1
or name.find('fc') != -1)
if args.ctx == 'gpu':
calib_layer = lambda name: name.endswith('_output') and (name.find('conv') != -1
or name.find('sc') != -1
or name.find('fc') != -1)
else:
calib_layer = lambda name: name.endswith('_output') and (name.find('conv') != -1
or name.find('sc') != -1)
excluded_sym_names += ['flatten0', 'fc1']
if exclude_first_conv:
excluded_sym_names = ['conv0']
excluded_sym_names += ['conv0']
elif args.model == 'imagenet1k-inception-bn':
rgb_mean = '123.68,116.779,103.939'
calib_layer = lambda name: name.endswith('_output') and (name.find('conv') != -1
or name.find('fc') != -1)
if args.ctx == 'gpu':
calib_layer = lambda name: name.endswith('_output') and (name.find('conv') != -1
or name.find('fc') != -1)
else:
calib_layer = lambda name: name.endswith('_output') and (name.find('conv') != -1)
excluded_sym_names += ['flatten', 'fc1']
if exclude_first_conv:
excluded_sym_names = ['conv_1']
excluded_sym_names += ['conv_1']
else:
raise ValueError('model %s is not supported in this script' % args.model)

Expand All @@ -156,8 +176,9 @@ def save_params(fname, arg_params, aux_params, logger=None):
if calib_mode == 'none':
logger.info('Quantizing FP32 model %s' % args.model)
qsym, qarg_params, aux_params = quantize_model(sym=sym, arg_params=arg_params, aux_params=aux_params,
excluded_sym_names=excluded_sym_names,
calib_mode=calib_mode, logger=logger)
ctx=ctx, excluded_sym_names=excluded_sym_names,
calib_mode=calib_mode, quantized_dtype=args.quantized_dtype,
logger=logger)
sym_name = '%s-symbol.json' % (prefix + '-quantized')
save_symbol(sym_name, qsym, logger)
else:
Expand All @@ -176,10 +197,11 @@ def save_params(fname, arg_params, aux_params, logger=None):
**mean_args)

cqsym, qarg_params, aux_params = quantize_model(sym=sym, arg_params=arg_params, aux_params=aux_params,
ctx=mx.gpu(0), excluded_sym_names=excluded_sym_names,
ctx=ctx, excluded_sym_names=excluded_sym_names,
calib_mode=calib_mode, calib_data=data,
num_calib_examples=num_calib_batches * batch_size,
calib_layer=calib_layer, logger=logger)
calib_layer=calib_layer, quantized_dtype=args.quantized_dtype,
logger=logger)
if calib_mode == 'entropy':
suffix = '-quantized-%dbatches-entropy' % num_calib_batches
elif calib_mode == 'naive':
Expand Down
10 changes: 9 additions & 1 deletion example/quantization/imagenet_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ def score(sym, arg_params, aux_params, data, devs, label_name, max_num_examples,

if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Score a model on a dataset')
parser.add_argument('--ctx', type=str, default='gpu')
parser.add_argument('--symbol-file', type=str, required=True, help='symbol file path')
parser.add_argument('--param-file', type=str, required=True, help='param file path')
parser.add_argument('--batch-size', type=int, default=32)
Expand All @@ -122,6 +123,13 @@ def score(sym, arg_params, aux_params, data, devs, label_name, max_num_examples,

args = parser.parse_args()

if args.ctx == 'gpu':
ctx = mx.gpu(0)
elif args.ctx == 'cpu':
ctx = mx.cpu(0)
else:
raise ValueError('ctx %s is not supported in this script' % args.ctx)

logging.basicConfig()
logger = logging.getLogger('logger')
logger.setLevel(logging.INFO)
Expand Down Expand Up @@ -172,5 +180,5 @@ def score(sym, arg_params, aux_params, data, devs, label_name, max_num_examples,

num_inference_images = args.num_inference_batches * batch_size
logger.info('Running model %s for inference' % symbol_file)
score(sym, arg_params, aux_params, data, [mx.gpu(0)], label_name,
score(sym, arg_params, aux_params, data, [ctx], label_name,
max_num_examples=num_inference_images, logger=logger)
4 changes: 3 additions & 1 deletion include/mxnet/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -1431,13 +1431,15 @@ MXNET_DLL int MXSymbolInferType(SymbolHandle sym,
* \param excluded_symbols array of symbols to be excluded from being quantized
* \param num_offline number of parameters that are quantized offline
* \param offline_params array of c strings representing the names of params quantized offline
* \param quantized_dtype the quantized destination type for input data.
*/
MXNET_DLL int MXQuantizeSymbol(SymbolHandle sym_handle,
SymbolHandle *ret_sym_handle,
const mx_uint num_excluded_symbols,
const SymbolHandle *excluded_symbols,
const mx_uint num_offline,
const char **offline_params);
const char **offline_params,
const char *quantized_dtype);

/*!
* \brief Set calibration table to node attributes in the sym
Expand Down
21 changes: 17 additions & 4 deletions python/mxnet/contrib/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ def _quantize_params(qsym, params):
return quantized_params


def _quantize_symbol(sym, excluded_symbols=None, offline_params=None):
def _quantize_symbol(sym, excluded_symbols=None, offline_params=None,
quantized_dtype='int8'):
"""Given a symbol object representing a neural network of data type FP32,
quantize it into a INT8 network.
Expand All @@ -86,6 +87,8 @@ def _quantize_symbol(sym, excluded_symbols=None, offline_params=None):
Names of the parameters that users want to quantize offline. It's always recommended to
quantize parameters offline so that quantizing parameters during the inference can be
avoided.
quantized_dtype: str
The quantized destination type for input data.
"""
num_excluded_symbols = 0
excluded_handles = []
Expand All @@ -108,7 +111,8 @@ def _quantize_symbol(sym, excluded_symbols=None, offline_params=None):
mx_uint(num_excluded_symbols),
c_array(SymbolHandle, excluded_handles),
mx_uint(num_offline),
c_array(ctypes.c_char_p, offline)))
c_array(ctypes.c_char_p, offline),
c_str(quantized_dtype)))
return Symbol(out)


Expand Down Expand Up @@ -401,7 +405,8 @@ def _load_params(params, logger=logging):
def quantize_model(sym, arg_params, aux_params,
data_names=('data',), label_names=('softmax_label',),
ctx=cpu(), excluded_sym_names=None, calib_mode='entropy',
calib_data=None, num_calib_examples=None, calib_layer=None, logger=logging):
calib_data=None, num_calib_examples=None, calib_layer=None,
quantized_dtype='int8', logger=logging):
"""User-level API for generating a quantized model from a FP32 model w/ or w/o calibration.
The backend quantized operators are only enabled for Linux systems. Please do not run
inference using the quantized models on Windows for now.
Expand Down Expand Up @@ -451,6 +456,9 @@ def quantize_model(sym, arg_params, aux_params,
calibrate this layer. If yes, the statistics of the layer's output will be collected;
otherwise, no information of the layer's output will be collected. If not provided,
all the layers' outputs that need requantization will be collected.
quantized_dtype : str
The quantized destination type for input data. Currently support 'int8'
and 'uint8', default value is 'int8'.
logger : Object
A logging object for printing information during the process of quantization.
Expand All @@ -473,8 +481,13 @@ def quantize_model(sym, arg_params, aux_params,
idx = nodes.list_outputs().index(sym_name + '_output')
excluded_syms.append(nodes[idx])
logger.info('Quantizing symbol')

if quantized_dtype != 'int8' and quantized_dtype != 'uint8':
raise ValueError('unknown quantized_dtype %s received,'
' expected `int8` or `uint8`' % quantized_dtype)
qsym = _quantize_symbol(sym, excluded_symbols=excluded_syms,
offline_params=list(arg_params.keys()))
offline_params=list(arg_params.keys()),
quantized_dtype=quantized_dtype)

logger.info('Quantizing parameters')
qarg_params = _quantize_params(qsym, arg_params)
Expand Down
5 changes: 4 additions & 1 deletion src/c_api/c_api_symbolic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,8 @@ int MXQuantizeSymbol(SymbolHandle sym_handle,
const mx_uint num_excluded_symbols,
const SymbolHandle *excluded_symbols,
const mx_uint num_offline,
const char **offline_params) {
const char **offline_params,
const char *quantized_dtype) {
nnvm::Symbol *s = new nnvm::Symbol();
API_BEGIN();
nnvm::Symbol *sym = static_cast<nnvm::Symbol*>(sym_handle);
Expand All @@ -594,7 +595,9 @@ int MXQuantizeSymbol(SymbolHandle sym_handle,
for (size_t i = 0; i < num_offline; ++i) {
offline.emplace(offline_params[i]);
}
std::string quantized_type(quantized_dtype);
g.attrs["offline_params"] = std::make_shared<nnvm::any>(std::move(offline));
g.attrs["quantized_dtype"] = std::make_shared<nnvm::any>(std::move(quantized_type));
g = ApplyPass(std::move(g), "QuantizeGraph");
s->outputs = g.outputs;
*ret_sym_handle = s;
Expand Down
2 changes: 2 additions & 0 deletions src/operator/nn/convolution-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ struct ConvolutionParam : public dmlc::Parameter<ConvolutionParam> {
}
};

void ConvolutionParamParser(nnvm::NodeAttrs* attrs);

typedef ParamOpSign<ConvolutionParam> ConvSignature;

} // namespace op
Expand Down
2 changes: 1 addition & 1 deletion src/operator/nn/convolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ inline static bool BackwardConvStorageType(const nnvm::NodeAttrs& attrs,
dispatch_mode, wanted_mode);
}

static void ConvolutionParamParser(nnvm::NodeAttrs* attrs) {
void ConvolutionParamParser(nnvm::NodeAttrs* attrs) {
using namespace mshadow;
ConvolutionParam param_;
try {
Expand Down
77 changes: 77 additions & 0 deletions src/operator/nn/mkldnn/mkldnn_convolution-inl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file mkldnn_convolution-inl.h
* \brief
*/

#ifndef MXNET_OPERATOR_NN_MKLDNN_MKLDNN_CONVOLUTION_INL_H_
#define MXNET_OPERATOR_NN_MKLDNN_MKLDNN_CONVOLUTION_INL_H_

#if MXNET_USE_MKLDNN == 1

#include <utility>
#include "../convolution-inl.h"
#include "./mkldnn_ops-inl.h"
#include "./mkldnn_base-inl.h"

namespace mxnet {
namespace op {

mkldnn::convolution_forward::primitive_desc GetConvFwdImpl(
const ConvolutionParam& param, const bool is_train, const NDArray &data,
const NDArray &weights, const NDArray *bias, const NDArray &output);

class MKLDNNConvForward {
public:
mkldnn::convolution_forward::primitive_desc fwd_pd;

MKLDNNConvForward(const ConvolutionParam& param, const bool is_train,
const NDArray &data, const NDArray &weights,
const NDArray *bias, const NDArray &output): fwd_pd(
GetConvFwdImpl(param, is_train, data, weights, bias, output)) {
}

void SetNewMem(const mkldnn::memory &data, const mkldnn::memory &weight,
const mkldnn::memory *bias, const mkldnn::memory &output);

const mkldnn::convolution_forward &GetFwd() const {
return *fwd_;
}

private:
std::shared_ptr<mkldnn::convolution_forward> fwd_;
std::shared_ptr<mkldnn::memory> data_;
std::shared_ptr<mkldnn::memory> weight_;
std::shared_ptr<mkldnn::memory> bias_;
std::shared_ptr<mkldnn::memory> out_;
};

typedef ParamOpSign<ConvolutionParam> MKLDNNConvSignature;

MKLDNNConvForward &GetConvFwd(const nnvm::NodeAttrs& attrs,
const bool is_train, const NDArray &data, const NDArray &weights,
const NDArray *bias, const NDArray &output);

} // namespace op
} // namespace mxnet

#endif // MXNET_USE_MKLDNN == 1
#endif // MXNET_OPERATOR_NN_MKLDNN_MKLDNN_CONVOLUTION_INL_H_
Loading

0 comments on commit d79e1ad

Please sign in to comment.